aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-03-28 21:34:14 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2024-03-31 20:36:12 -0400
commit13c1e583f9179ad7953dc71ebb2f12e613b9d052 (patch)
treea76d2d7df5caac4cbb063c02b66084e6a44f0044 /fs
parent060ff30a8596b649a80c19935758000dde7855fe (diff)
downloadlinux-13c1e583f9179ad7953dc71ebb2f12e613b9d052.tar.gz
bcachefs: Improve -o norecovery; opts.recovery_pass_limit
This adds opts.recovery_pass_limit, and redoes -o norecovery to make use of it; this fixes some issues with -o norecovery so it can be safely used for data recovery. Norecovery means "don't do journal replay"; it's an important data recovery tool when we're getting stuck in journal replay. When using it this way we need to make sure we don't free journal keys after startup, so we continue to overlay them: thus it needs to imply retain_recovery_info, as well as nochanges. recovery_pass_limit is an explicit option for telling recovery to exit after a specific recovery pass; this is a much cleaner way of implementing -o norecovery, as well as being a useful debug feature in its own right. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/opts.c4
-rw-r--r--fs/bcachefs/opts.h7
-rw-r--r--fs/bcachefs/recovery.c10
-rw-r--r--fs/bcachefs/recovery_passes.c12
-rw-r--r--fs/bcachefs/snapshot.c2
-rw-r--r--fs/bcachefs/super.c5
6 files changed, 26 insertions, 14 deletions
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index 08ea0cfc4aef08..e1800c4119b5fb 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -7,6 +7,7 @@
#include "disk_groups.h"
#include "error.h"
#include "opts.h"
+#include "recovery_passes.h"
#include "super-io.h"
#include "util.h"
@@ -205,6 +206,9 @@ const struct bch_option bch2_opt_table[] = {
#define OPT_STR(_choices) .type = BCH_OPT_STR, \
.min = 0, .max = ARRAY_SIZE(_choices), \
.choices = _choices
+#define OPT_STR_NOLIMIT(_choices) .type = BCH_OPT_STR, \
+ .min = 0, .max = U64_MAX, \
+ .choices = _choices
#define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn
#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 136083c11f3a3a..084247c13bd8fd 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -362,7 +362,12 @@ enum fsck_err_opts {
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
- NULL, "Don't replay the journal") \
+ NULL, "Exit recovery immediately prior to journal replay")\
+ x(recovery_pass_last, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_STR_NOLIMIT(bch2_recovery_passes), \
+ BCH2_NO_SB_OPT, 0, \
+ NULL, "Exit recovery after specified pass") \
x(keep_journal, u8, \
0, \
OPT_BOOL(), \
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index e3b06430d6067b..e8b43400929362 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -269,7 +269,8 @@ int bch2_journal_replay(struct bch_fs *c)
bch2_trans_put(trans);
trans = NULL;
- if (!c->opts.keep_journal)
+ if (!c->opts.keep_journal &&
+ c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay)
bch2_journal_keys_put_initial(c);
replay_now_at(j, j->replay_journal_seq_end);
@@ -584,11 +585,8 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
- if (c->opts.fsck && c->opts.norecovery) {
- bch_err(c, "cannot select both norecovery and fsck");
- ret = -EINVAL;
- goto err;
- }
+ if (c->opts.norecovery)
+ c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1;
if (!c->opts.nochanges) {
mutex_lock(&c->sb_lock);
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
index fb22cce10f6683..0089d9456b1006 100644
--- a/fs/bcachefs/recovery_passes.c
+++ b/fs/bcachefs/recovery_passes.c
@@ -27,7 +27,7 @@ const char * const bch2_recovery_passes[] = {
static int bch2_check_allocations(struct bch_fs *c)
{
- return bch2_gc(c, true, c->opts.norecovery);
+ return bch2_gc(c, true, false);
}
static int bch2_set_may_go_rw(struct bch_fs *c)
@@ -144,8 +144,6 @@ static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pa
{
struct recovery_pass_fn *p = recovery_pass_fns + pass;
- if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
- return false;
if (c->recovery_passes_explicit & BIT_ULL(pass))
return true;
if ((p->when & PASS_FSCK) && c->opts.fsck)
@@ -201,6 +199,10 @@ int bch2_run_recovery_passes(struct bch_fs *c)
int ret = 0;
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
+ if (c->opts.recovery_pass_last &&
+ c->curr_recovery_pass > c->opts.recovery_pass_last)
+ break;
+
if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
unsigned pass = c->curr_recovery_pass;
@@ -213,8 +215,10 @@ int bch2_run_recovery_passes(struct bch_fs *c)
c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
}
- c->curr_recovery_pass++;
+
c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
+
+ c->curr_recovery_pass++;
}
return ret;
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 4e074136c49079..4577ee7939a2a5 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -131,7 +131,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
rcu_read_lock();
struct snapshot_table *t = rcu_dereference(c->snapshots);
- if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) {
+ if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
goto out;
}
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 89ee5d3ec11963..bc026a77eb99d4 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -365,7 +365,7 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_emergency_ro, &c->flags) &&
test_bit(BCH_FS_started, &c->flags) &&
test_bit(BCH_FS_clean_shutdown, &c->flags) &&
- !c->opts.norecovery) {
+ c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) {
BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
BUG_ON(atomic_read(&c->btree_cache.dirty));
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
@@ -510,7 +510,8 @@ err:
int bch2_fs_read_write(struct bch_fs *c)
{
- if (c->opts.norecovery)
+ if (c->opts.recovery_pass_last &&
+ c->opts.recovery_pass_last < BCH_RECOVERY_PASS_journal_replay)
return -BCH_ERR_erofs_norecovery;
if (c->opts.nochanges)