aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-11-17 14:36:58 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-11-17 14:36:58 -0800
commit791c8ab095f71327899023223940dd52257a4173 (patch)
tree6930e073e2941693348c551d3092729dca1132d2
parent12ee72fe01e45a9586b9d130c5501763818c8efc (diff)
parentba276ce5865b5a22ee96c4c5664bfefd9c1bb593 (diff)
downloadlinux-rcu-791c8ab095f71327899023223940dd52257a4173.tar.gz
Merge tag 'bcachefs-2023-11-17' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs fixes from Kent Overstreet: "Lots of small fixes for minor nits and compiler warnings. Bigger items: - The six locks lost wakeup is finally fixed: six_read_trylock() was checking for the waiting bit before decrementing the number of readers - validated the fix with a torture test. - Fix for a memory reclaim issue: when needing to reallocate a key cache key, we now do our usual GFP_NOWAIT; unlock(); GFP_KERNEL dance. - Multiple deleted inodes btree fixes - Fix an issue in fsck, where i_nlink would be recalculated incorrectly for hardlinked files if a snapshot had ever been taken. - Kill journal pre-reservations: This is a bigger patch than I would normally send at this point, but it deletes code and it fixes some of our tests that would sporadically die with the journal getting stuck, and it's a performance improvement, too" * tag 'bcachefs-2023-11-17' of https://evilpiepirate.org/git/bcachefs: (22 commits) bcachefs: Fix missing locking for dentry->d_parent access bcachefs: six locks: Fix lost wakeup bcachefs: Fix no_data_io mode checksum check bcachefs: Fix bch2_check_nlinks() for snapshots bcachefs: Don't decrease BTREE_ITER_MAX when LOCKDEP=y bcachefs: Disable debug log statements bcachefs: Fix missing transaction commit bcachefs: Fix error path in bch2_mount() bcachefs: Fix potential sleeping during mount bcachefs: Fix iterator leak in may_delete_deleted_inode() bcachefs: Kill journal pre-reservations bcachefs: Check for nonce offset inconsistency in data_update path bcachefs: Make sure to drop/retake btree locks before reclaim bcachefs: btree_trans->write_locked bcachefs: Run btree key cache shrinker less aggressively bcachefs: Split out btree_key_cache_types.h bcachefs: Guard against insufficient devices to create stripes bcachefs: Fix null ptr deref in bch2_backpointer_get_node() bcachefs: Fix multiple -Warray-bounds warnings bcachefs: Use DECLARE_FLEX_ARRAY() helper and fix multiple -Warray-bounds warnings ...
-rw-r--r--fs/bcachefs/backpointers.c10
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/btree_iter.c2
-rw-r--r--fs/bcachefs/btree_key_cache.c37
-rw-r--r--fs/bcachefs/btree_key_cache_types.h34
-rw-r--r--fs/bcachefs/btree_trans_commit.c169
-rw-r--r--fs/bcachefs/btree_types.h35
-rw-r--r--fs/bcachefs/btree_update_interior.c30
-rw-r--r--fs/bcachefs/btree_update_interior.h1
-rw-r--r--fs/bcachefs/data_update.c28
-rw-r--r--fs/bcachefs/disk_groups.c4
-rw-r--r--fs/bcachefs/ec.c16
-rw-r--r--fs/bcachefs/fs-io-pagecache.c2
-rw-r--r--fs/bcachefs/fs-io-pagecache.h2
-rw-r--r--fs/bcachefs/fs.c8
-rw-r--r--fs/bcachefs/fsck.c2
-rw-r--r--fs/bcachefs/inode.c8
-rw-r--r--fs/bcachefs/io_write.c2
-rw-r--r--fs/bcachefs/journal.c31
-rw-r--r--fs/bcachefs/journal.h98
-rw-r--r--fs/bcachefs/journal_io.c7
-rw-r--r--fs/bcachefs/journal_reclaim.c42
-rw-r--r--fs/bcachefs/journal_types.h26
-rw-r--r--fs/bcachefs/six.c7
-rw-r--r--fs/bcachefs/subvolume_types.h2
-rw-r--r--fs/bcachefs/trace.h11
-rw-r--r--fs/bcachefs/xattr.c9
27 files changed, 248 insertions, 377 deletions
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index ef02c9bb035417..23c0834a97a4ac 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -313,17 +313,17 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
bp.level - 1,
0);
b = bch2_btree_iter_peek_node(iter);
- if (IS_ERR(b))
+ if (IS_ERR_OR_NULL(b))
goto err;
BUG_ON(b->c.level != bp.level - 1);
- if (b && extent_matches_bp(c, bp.btree_id, bp.level,
- bkey_i_to_s_c(&b->key),
- bucket, bp))
+ if (extent_matches_bp(c, bp.btree_id, bp.level,
+ bkey_i_to_s_c(&b->key),
+ bucket, bp))
return b;
- if (b && btree_node_will_make_reachable(b)) {
+ if (btree_node_will_make_reachable(b)) {
b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
} else {
backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 9cb8684959ee17..403aa3389fccfd 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -617,7 +617,7 @@ struct journal_seq_blacklist_table {
u64 start;
u64 end;
bool dirty;
- } entries[0];
+ } entries[];
};
struct journal_keys {
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index c2adf3fbb0b3ab..6fa90bcd701682 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -3087,8 +3087,6 @@ void bch2_trans_put(struct btree_trans *trans)
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
}
- bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
kfree(trans->extra_journal_entries.data);
if (trans->fs_usage_deltas) {
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 9b78f78a75b59c..37fbf22de8fcba 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -89,10 +89,13 @@ static void bkey_cached_free(struct btree_key_cache *bc,
ck->btree_trans_barrier_seq =
start_poll_synchronize_srcu(&c->btree_trans_barrier);
- if (ck->c.lock.readers)
+ if (ck->c.lock.readers) {
list_move_tail(&ck->list, &bc->freed_pcpu);
- else
+ bc->nr_freed_pcpu++;
+ } else {
list_move_tail(&ck->list, &bc->freed_nonpcpu);
+ bc->nr_freed_nonpcpu++;
+ }
atomic_long_inc(&bc->nr_freed);
kfree(ck->k);
@@ -109,6 +112,8 @@ static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc,
{
struct bkey_cached *pos;
+ bc->nr_freed_nonpcpu++;
+
list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
pos->btree_trans_barrier_seq)) {
@@ -158,6 +163,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
#else
mutex_lock(&bc->lock);
list_move_tail(&ck->list, &bc->freed_nonpcpu);
+ bc->nr_freed_nonpcpu++;
mutex_unlock(&bc->lock);
#endif
} else {
@@ -217,6 +223,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
f->nr < ARRAY_SIZE(f->objs) / 2) {
ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
list_del_init(&ck->list);
+ bc->nr_freed_nonpcpu--;
f->objs[f->nr++] = ck;
}
@@ -229,6 +236,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
if (!list_empty(&bc->freed_nonpcpu)) {
ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
list_del_init(&ck->list);
+ bc->nr_freed_nonpcpu--;
}
mutex_unlock(&bc->lock);
#endif
@@ -664,7 +672,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
goto out;
bch2_journal_pin_drop(j, &ck->journal);
- bch2_journal_preres_put(j, &ck->res);
BUG_ON(!btree_node_locked(c_iter.path, 0));
@@ -762,18 +769,6 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
BUG_ON(insert->k.u64s > ck->u64s);
- if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
- int difference;
-
- BUG_ON(jset_u64s(insert->k.u64s) > trans->journal_preres.u64s);
-
- difference = jset_u64s(insert->k.u64s) - ck->res.u64s;
- if (difference > 0) {
- trans->journal_preres.u64s -= difference;
- ck->res.u64s += difference;
- }
- }
-
bkey_copy(ck->k, insert);
ck->valid = true;
@@ -850,6 +845,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
* Newest freed entries are at the end of the list - once we hit one
* that's too new to be freed, we can bail out:
*/
+ scanned += bc->nr_freed_nonpcpu;
+
list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
@@ -859,13 +856,15 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
six_lock_exit(&ck->c.lock);
kmem_cache_free(bch2_key_cache, ck);
atomic_long_dec(&bc->nr_freed);
- scanned++;
freed++;
+ bc->nr_freed_nonpcpu--;
}
if (scanned >= nr)
goto out;
+ scanned += bc->nr_freed_pcpu;
+
list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
@@ -875,8 +874,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
six_lock_exit(&ck->c.lock);
kmem_cache_free(bch2_key_cache, ck);
atomic_long_dec(&bc->nr_freed);
- scanned++;
freed++;
+ bc->nr_freed_pcpu--;
}
if (scanned >= nr)
@@ -982,6 +981,9 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
}
#endif
+ BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu);
+ BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu);
+
list_splice(&bc->freed_pcpu, &items);
list_splice(&bc->freed_nonpcpu, &items);
@@ -991,7 +993,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
cond_resched();
bch2_journal_pin_drop(&c->journal, &ck->journal);
- bch2_journal_preres_put(&c->journal, &ck->res);
list_del(&ck->list);
kfree(ck->k);
diff --git a/fs/bcachefs/btree_key_cache_types.h b/fs/bcachefs/btree_key_cache_types.h
new file mode 100644
index 00000000000000..290e4e57df5bbc
--- /dev/null
+++ b/fs/bcachefs/btree_key_cache_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+#define _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+
+struct btree_key_cache_freelist {
+ struct bkey_cached *objs[16];
+ unsigned nr;
+};
+
+struct btree_key_cache {
+ struct mutex lock;
+ struct rhashtable table;
+ bool table_init_done;
+
+ struct list_head freed_pcpu;
+ size_t nr_freed_pcpu;
+ struct list_head freed_nonpcpu;
+ size_t nr_freed_nonpcpu;
+
+ struct shrinker *shrink;
+ unsigned shrink_iter;
+ struct btree_key_cache_freelist __percpu *pcpu_freed;
+
+ atomic_long_t nr_freed;
+ atomic_long_t nr_keys;
+ atomic_long_t nr_dirty;
+};
+
+struct bkey_cached_key {
+ u32 btree_id;
+ struct bpos pos;
+} __packed __aligned(4);
+
+#endif /* _BCACHEFS_BTREE_KEY_CACHE_TYPES_H */
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index decad7b66c59c1..12907beda98c2b 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -78,6 +78,53 @@ inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
bch2_btree_init_next(trans, b);
}
+static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
+{
+ while (--i >= trans->updates) {
+ if (same_leaf_as_prev(trans, i))
+ continue;
+
+ bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
+ }
+
+ trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
+}
+
+static inline int bch2_trans_lock_write(struct btree_trans *trans)
+{
+ struct btree_insert_entry *i;
+
+ EBUG_ON(trans->write_locked);
+
+ trans_for_each_update(trans, i) {
+ if (same_leaf_as_prev(trans, i))
+ continue;
+
+ if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
+ return trans_lock_write_fail(trans, i);
+
+ if (!i->cached)
+ bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
+ }
+
+ trans->write_locked = true;
+ return 0;
+}
+
+static inline void bch2_trans_unlock_write(struct btree_trans *trans)
+{
+ if (likely(trans->write_locked)) {
+ struct btree_insert_entry *i;
+
+ trans_for_each_update(trans, i)
+ if (!same_leaf_as_prev(trans, i))
+ bch2_btree_node_unlock_write_inlined(trans, i->path,
+ insert_l(i)->b);
+ trans->write_locked = false;
+ }
+}
+
/* Inserting into a given leaf node (last stage of insert): */
/* Handle overwrites and do insert, for non extents: */
@@ -276,17 +323,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
}
-static noinline int
-bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags,
- unsigned long trace_ip)
-{
- return drop_locks_do(trans,
- bch2_journal_preres_get(&trans->c->journal,
- &trans->journal_preres,
- trans->journal_preres_u64s,
- (flags & BCH_WATERMARK_MASK)));
-}
-
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
unsigned flags)
{
@@ -321,6 +357,45 @@ static inline int btree_key_can_insert(struct btree_trans *trans,
return 0;
}
+noinline static int
+btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
+ struct btree_path *path, unsigned new_u64s)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_insert_entry *i;
+ struct bkey_cached *ck = (void *) path->l[0].b;
+ struct bkey_i *new_k;
+ int ret;
+
+ bch2_trans_unlock_write(trans);
+ bch2_trans_unlock(trans);
+
+ new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
+ if (!new_k) {
+ bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
+ bch2_btree_id_str(path->btree_id), new_u64s);
+ return -BCH_ERR_ENOMEM_btree_key_cache_insert;
+ }
+
+ ret = bch2_trans_relock(trans) ?:
+ bch2_trans_lock_write(trans);
+ if (unlikely(ret)) {
+ kfree(new_k);
+ return ret;
+ }
+
+ memcpy(new_k, ck->k, ck->u64s * sizeof(u64));
+
+ trans_for_each_update(trans, i)
+ if (i->old_v == &ck->k->v)
+ i->old_v = &new_k->v;
+
+ kfree(ck->k);
+ ck->u64s = new_u64s;
+ ck->k = new_k;
+ return 0;
+}
+
static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags,
struct btree_path *path, unsigned u64s)
{
@@ -347,12 +422,9 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
return 0;
new_u64s = roundup_pow_of_two(u64s);
- new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
- if (!new_k) {
- bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
- bch2_btree_id_str(path->btree_id), new_u64s);
- return -BCH_ERR_ENOMEM_btree_key_cache_insert;
- }
+ new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT);
+ if (unlikely(!new_k))
+ return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s);
trans_for_each_update(trans, i)
if (i->old_v == &ck->k->v)
@@ -732,37 +804,6 @@ revert_fs_usage:
return ret;
}
-static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
-{
- while (--i >= trans->updates) {
- if (same_leaf_as_prev(trans, i))
- continue;
-
- bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
- }
-
- trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
- return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
-}
-
-static inline int trans_lock_write(struct btree_trans *trans)
-{
- struct btree_insert_entry *i;
-
- trans_for_each_update(trans, i) {
- if (same_leaf_as_prev(trans, i))
- continue;
-
- if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
- return trans_lock_write_fail(trans, i);
-
- if (!i->cached)
- bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
- }
-
- return 0;
-}
-
static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
{
struct btree_insert_entry *i;
@@ -830,15 +871,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
}
}
- ret = bch2_journal_preres_get(&c->journal,
- &trans->journal_preres, trans->journal_preres_u64s,
- (flags & BCH_WATERMARK_MASK)|JOURNAL_RES_GET_NONBLOCK);
- if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked))
- ret = bch2_trans_journal_preres_get_cold(trans, flags, trace_ip);
- if (unlikely(ret))
- return ret;
-
- ret = trans_lock_write(trans);
+ ret = bch2_trans_lock_write(trans);
if (unlikely(ret))
return ret;
@@ -847,10 +880,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
if (!ret && unlikely(trans->journal_replay_not_finished))
bch2_drop_overwrites_from_journal(trans);
- trans_for_each_update(trans, i)
- if (!same_leaf_as_prev(trans, i))
- bch2_btree_node_unlock_write_inlined(trans, i->path,
- insert_l(i)->b);
+ bch2_trans_unlock_write(trans);
if (!ret && trans->journal_pin)
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
@@ -1003,7 +1033,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
struct bch_fs *c = trans->c;
struct btree_insert_entry *i = NULL;
struct btree_write_buffered_key *wb;
- unsigned u64s;
int ret = 0;
if (!trans->nr_updates &&
@@ -1063,13 +1092,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
- memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
-
trans->journal_u64s = trans->extra_journal_entries.nr;
- trans->journal_preres_u64s = 0;
-
trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
-
if (trans->journal_transaction_names)
trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
@@ -1085,16 +1109,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (i->key_cache_already_flushed)
continue;
- /* we're going to journal the key being updated: */
- u64s = jset_u64s(i->k->k.u64s);
- if (i->cached &&
- likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY)))
- trans->journal_preres_u64s += u64s;
-
if (i->flags & BTREE_UPDATE_NOJOURNAL)
continue;
- trans->journal_u64s += u64s;
+ /* we're going to journal the key being updated: */
+ trans->journal_u64s += jset_u64s(i->k->k.u64s);
/* and we're also going to log the overwrite: */
if (trans->journal_transaction_names)
@@ -1126,8 +1145,6 @@ retry:
trace_and_count(c, transaction_commit, trans, _RET_IP_);
out:
- bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
bch2_write_ref_put(c, BCH_WRITE_REF_trans);
out_reset:
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 941841a0c5bf68..60453ba86c4b96 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -5,7 +5,7 @@
#include <linux/list.h>
#include <linux/rhashtable.h>
-//#include "bkey_methods.h"
+#include "btree_key_cache_types.h"
#include "buckets_types.h"
#include "darray.h"
#include "errcode.h"
@@ -312,31 +312,6 @@ struct btree_iter {
#endif
};
-struct btree_key_cache_freelist {
- struct bkey_cached *objs[16];
- unsigned nr;
-};
-
-struct btree_key_cache {
- struct mutex lock;
- struct rhashtable table;
- bool table_init_done;
- struct list_head freed_pcpu;
- struct list_head freed_nonpcpu;
- struct shrinker *shrink;
- unsigned shrink_iter;
- struct btree_key_cache_freelist __percpu *pcpu_freed;
-
- atomic_long_t nr_freed;
- atomic_long_t nr_keys;
- atomic_long_t nr_dirty;
-};
-
-struct bkey_cached_key {
- u32 btree_id;
- struct bpos pos;
-} __packed __aligned(4);
-
#define BKEY_CACHED_ACCESSED 0
#define BKEY_CACHED_DIRTY 1
@@ -352,7 +327,6 @@ struct bkey_cached {
struct rhash_head hash;
struct list_head list;
- struct journal_preres res;
struct journal_entry_pin journal;
u64 seq;
@@ -389,11 +363,7 @@ struct btree_insert_entry {
unsigned long ip_allocated;
};
-#ifndef CONFIG_LOCKDEP
#define BTREE_ITER_MAX 64
-#else
-#define BTREE_ITER_MAX 32
-#endif
struct btree_trans_commit_hook;
typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
@@ -434,6 +404,7 @@ struct btree_trans {
bool journal_transaction_names:1;
bool journal_replay_not_finished:1;
bool notrace_relock_fail:1;
+ bool write_locked:1;
enum bch_errcode restarted:16;
u32 restart_count;
unsigned long last_begin_ip;
@@ -465,11 +436,9 @@ struct btree_trans {
struct journal_entry_pin *journal_pin;
struct journal_res journal_res;
- struct journal_preres journal_preres;
u64 *journal_seq;
struct disk_reservation *disk_res;
unsigned journal_u64s;
- unsigned journal_preres_u64s;
struct replicas_delta_list *fs_usage_deltas;
};
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 39c2db68123bd1..76f27bc9fa24e0 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -513,8 +513,6 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
up_read(&c->gc_lock);
as->took_gc_lock = false;
- bch2_journal_preres_put(&c->journal, &as->journal_preres);
-
bch2_journal_pin_drop(&c->journal, &as->journal);
bch2_journal_pin_flush(&c->journal, &as->journal);
bch2_disk_reservation_put(c, &as->disk_res);
@@ -734,8 +732,6 @@ err:
bch2_journal_pin_drop(&c->journal, &as->journal);
- bch2_journal_preres_put(&c->journal, &as->journal_preres);
-
mutex_lock(&c->btree_interior_update_lock);
for (i = 0; i < as->nr_new_nodes; i++) {
b = as->new_nodes[i];
@@ -1047,7 +1043,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
unsigned nr_nodes[2] = { 0, 0 };
unsigned update_level = level;
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
- unsigned journal_flags = 0;
int ret = 0;
u32 restart_count = trans->restart_count;
@@ -1061,10 +1056,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
flags &= ~BCH_WATERMARK_MASK;
flags |= watermark;
- if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
- journal_flags |= JOURNAL_RES_GET_NONBLOCK;
- journal_flags |= watermark;
-
while (1) {
nr_nodes[!!update_level] += 1 + split;
update_level++;
@@ -1129,27 +1120,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
if (ret)
goto err;
- ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
- BTREE_UPDATE_JOURNAL_RES,
- journal_flags|JOURNAL_RES_GET_NONBLOCK);
- if (ret) {
- if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
- ret = -BCH_ERR_journal_reclaim_would_deadlock;
- goto err;
- }
-
- ret = drop_locks_do(trans,
- bch2_journal_preres_get(&c->journal, &as->journal_preres,
- BTREE_UPDATE_JOURNAL_RES,
- journal_flags));
- if (ret == -BCH_ERR_journal_preres_get_blocked) {
- trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags);
- ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
- }
- if (ret)
- goto err;
- }
-
ret = bch2_disk_reservation_get(c, &as->disk_res,
(nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
c->opts.metadata_replicas,
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index 4df21512d640da..031076e75fa132 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -55,7 +55,6 @@ struct btree_update {
unsigned update_level;
struct disk_reservation disk_res;
- struct journal_preres journal_preres;
/*
* BTREE_INTERIOR_UPDATING_NODE:
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 0771a6d880bf5e..5ed66202c22654 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -239,6 +239,34 @@ restart_drop_extra_replicas:
next_pos = insert->k.p;
+ /*
+ * Check for nonce offset inconsistency:
+ * This is debug code - we've been seeing this bug rarely, and
+ * it's been hard to reproduce, so this should give us some more
+ * information when it does occur:
+ */
+ struct printbuf err = PRINTBUF;
+ int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err);
+ printbuf_exit(&err);
+
+ if (invalid) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "about to insert invalid key in data update path");
+ prt_str(&buf, "\nold: ");
+ bch2_bkey_val_to_text(&buf, c, old);
+ prt_str(&buf, "\nk: ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, "\nnew: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+
+ bch2_fatal_error(c);
+ goto out;
+ }
+
ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, bkey_start_pos(&insert->k)) ?:
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c
index d613695abf9f67..4d0cb0ccff32f2 100644
--- a/fs/bcachefs/disk_groups.c
+++ b/fs/bcachefs/disk_groups.c
@@ -555,6 +555,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
case TARGET_DEV: {
struct bch_dev *ca;
+ out->atomic++;
rcu_read_lock();
ca = t.dev < c->sb.nr_devices
? rcu_dereference(c->devs[t.dev])
@@ -570,6 +571,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
}
rcu_read_unlock();
+ out->atomic--;
break;
}
case TARGET_GROUP:
@@ -580,7 +582,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
}
}
-void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
+static void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
{
struct target t = target_decode(v);
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 875f7c5a6fca63..2a77de18c004e7 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -1373,6 +1373,15 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
h->nr_active_devs++;
rcu_read_unlock();
+
+ /*
+ * If we only have redundancy + 1 devices, we're better off with just
+ * replication:
+ */
+ if (h->nr_active_devs < h->redundancy + 2)
+ bch_err(c, "insufficient devices available to create stripe (have %u, need %u) - mismatched bucket sizes?",
+ h->nr_active_devs, h->redundancy + 2);
+
list_add(&h->list, &c->ec_stripe_head_list);
return h;
}
@@ -1424,6 +1433,11 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
h = ec_new_stripe_head_alloc(c, target, algo, redundancy, watermark);
found:
+ if (!IS_ERR_OR_NULL(h) &&
+ h->nr_active_devs < h->redundancy + 2) {
+ mutex_unlock(&h->lock);
+ h = NULL;
+ }
mutex_unlock(&c->ec_stripe_head_lock);
return h;
}
@@ -1681,8 +1695,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
int ret;
h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, watermark);
- if (!h)
- bch_err(c, "no stripe head");
if (IS_ERR_OR_NULL(h))
return h;
diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c
index 8bd9bcdd27f738..ff664fd0d8ef80 100644
--- a/fs/bcachefs/fs-io-pagecache.c
+++ b/fs/bcachefs/fs-io-pagecache.c
@@ -13,7 +13,7 @@
int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
loff_t start, u64 end,
- int fgp_flags, gfp_t gfp,
+ fgf_t fgp_flags, gfp_t gfp,
folios *fs)
{
struct folio *f;
diff --git a/fs/bcachefs/fs-io-pagecache.h b/fs/bcachefs/fs-io-pagecache.h
index a2222ad586e9e7..27f712ae37a682 100644
--- a/fs/bcachefs/fs-io-pagecache.h
+++ b/fs/bcachefs/fs-io-pagecache.h
@@ -7,7 +7,7 @@
typedef DARRAY(struct folio *) folios;
int bch2_filemap_get_contig_folios_d(struct address_space *, loff_t,
- u64, int, gfp_t, folios *);
+ u64, fgf_t, gfp_t, folios *);
int bch2_write_invalidate_inode_pages_range(struct address_space *, loff_t, loff_t);
/*
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 166d8d8abe683f..8ef817304e4a2e 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1922,10 +1922,7 @@ out:
return dget(sb->s_root);
err_put_super:
- sb->s_fs_info = NULL;
- c->vfs_sb = NULL;
deactivate_locked_super(sb);
- bch2_fs_stop(c);
return ERR_PTR(bch2_err_class(ret));
}
@@ -1933,11 +1930,8 @@ static void bch2_kill_sb(struct super_block *sb)
{
struct bch_fs *c = sb->s_fs_info;
- if (c)
- c->vfs_sb = NULL;
generic_shutdown_super(sb);
- if (c)
- bch2_fs_free(c);
+ bch2_fs_free(c);
}
static struct file_system_type bcache_fs_type = {
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 9f3e9bd3d767a7..e0c5cd119acc93 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -2220,7 +2220,7 @@ static int nlink_cmp(const void *_l, const void *_r)
const struct nlink *l = _l;
const struct nlink *r = _r;
- return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
+ return cmp_int(l->inum, r->inum);
}
static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index def77f2d88024b..c7849b0753e7a1 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -1134,7 +1134,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
* unlinked inodes in the snapshot leaves:
*/
*need_another_pass = true;
- return 0;
+ goto out;
}
ret = 1;
@@ -1169,8 +1169,10 @@ again:
*/
for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
- ret = lockrestart_do(trans, may_delete_deleted_inode(trans, &iter, k.k->p,
- &need_another_pass));
+ ret = commit_do(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW,
+ may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass));
if (ret < 0)
break;
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index f02b3f7d26a016..d704a8f829c8a7 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -795,7 +795,7 @@ static int bch2_write_decrypt(struct bch_write_op *op)
* checksum:
*/
csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
- if (bch2_crc_cmp(op->crc.csum, csum))
+ if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
return -EIO;
ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 5b5d69f2316b21..23a9b7845d1197 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -526,36 +526,6 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
return ret;
}
-/* journal_preres: */
-
-static bool journal_preres_available(struct journal *j,
- struct journal_preres *res,
- unsigned new_u64s,
- unsigned flags)
-{
- bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags, true);
-
- if (!ret && mutex_trylock(&j->reclaim_lock)) {
- bch2_journal_reclaim(j);
- mutex_unlock(&j->reclaim_lock);
- }
-
- return ret;
-}
-
-int __bch2_journal_preres_get(struct journal *j,
- struct journal_preres *res,
- unsigned new_u64s,
- unsigned flags)
-{
- int ret;
-
- closure_wait_event(&j->preres_wait,
- (ret = bch2_journal_error(j)) ||
- journal_preres_available(j, res, new_u64s, flags));
- return ret;
-}
-
/* journal_entry_res: */
void bch2_journal_entry_res_resize(struct journal *j,
@@ -1306,7 +1276,6 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j));
prt_printf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk);
prt_printf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk);
- prt_printf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining);
prt_printf(out, "watermark:\t\t%s\n", bch2_watermarks[j->watermark]);
prt_printf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved);
prt_printf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes);
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 011711e99c8d82..c85d01cf494849 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -395,104 +395,6 @@ out:
return 0;
}
-/* journal_preres: */
-
-static inline void journal_set_watermark(struct journal *j)
-{
- union journal_preres_state s = READ_ONCE(j->prereserved);
- unsigned watermark = BCH_WATERMARK_stripe;
-
- if (fifo_free(&j->pin) < j->pin.size / 4)
- watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
- if (fifo_free(&j->pin) < j->pin.size / 8)
- watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-
- if (s.reserved > s.remaining)
- watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
- if (!s.remaining)
- watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-
- if (watermark == j->watermark)
- return;
-
- swap(watermark, j->watermark);
- if (watermark > j->watermark)
- journal_wake(j);
-}
-
-static inline void bch2_journal_preres_put(struct journal *j,
- struct journal_preres *res)
-{
- union journal_preres_state s = { .reserved = res->u64s };
-
- if (!res->u64s)
- return;
-
- s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
- res->u64s = 0;
-
- if (unlikely(s.waiting)) {
- clear_bit(ilog2((((union journal_preres_state) { .waiting = 1 }).v)),
- (unsigned long *) &j->prereserved.v);
- closure_wake_up(&j->preres_wait);
- }
-
- if (s.reserved <= s.remaining && j->watermark)
- journal_set_watermark(j);
-}
-
-int __bch2_journal_preres_get(struct journal *,
- struct journal_preres *, unsigned, unsigned);
-
-static inline int bch2_journal_preres_get_fast(struct journal *j,
- struct journal_preres *res,
- unsigned new_u64s,
- unsigned flags,
- bool set_waiting)
-{
- int d = new_u64s - res->u64s;
- union journal_preres_state old, new;
- u64 v = atomic64_read(&j->prereserved.counter);
- enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
- int ret;
-
- do {
- old.v = new.v = v;
- ret = 0;
-
- if (watermark == BCH_WATERMARK_reclaim ||
- new.reserved + d < new.remaining) {
- new.reserved += d;
- ret = 1;
- } else if (set_waiting && !new.waiting)
- new.waiting = true;
- else
- return 0;
- } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
- old.v, new.v)) != old.v);
-
- if (ret)
- res->u64s += d;
- return ret;
-}
-
-static inline int bch2_journal_preres_get(struct journal *j,
- struct journal_preres *res,
- unsigned new_u64s,
- unsigned flags)
-{
- if (new_u64s <= res->u64s)
- return 0;
-
- if (bch2_journal_preres_get_fast(j, res, new_u64s, flags, false))
- return 0;
-
- if (flags & JOURNAL_RES_GET_NONBLOCK)
- return -BCH_ERR_journal_preres_get_blocked;
-
- return __bch2_journal_preres_get(j, res, new_u64s, flags);
-}
-
/* journal_entry_res: */
void bch2_journal_entry_res_resize(struct journal *,
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index f4bc2cdbfdd792..786a0928550920 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1079,6 +1079,12 @@ found:
if (ja->bucket_seq[ja->cur_idx] &&
ja->sectors_free == ca->mi.bucket_size) {
+#if 0
+ /*
+ * Debug code for ZNS support, where we (probably) want to be
+ * correlated where we stopped in the journal to the zone write
+ * points:
+ */
bch_err(c, "ja->sectors_free == ca->mi.bucket_size");
bch_err(c, "cur_idx %u/%u", ja->cur_idx, ja->nr);
for (i = 0; i < 3; i++) {
@@ -1086,6 +1092,7 @@ found:
bch_err(c, "bucket_seq[%u] = %llu", idx, ja->bucket_seq[idx]);
}
+#endif
ja->sectors_free = 0;
}
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 9a584aaaa2eba9..e63c6eda86afeb 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -50,16 +50,21 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
return available;
}
-static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
+static inline void journal_set_watermark(struct journal *j, bool low_on_space)
{
- union journal_preres_state old, new;
- u64 v = atomic64_read(&j->prereserved.counter);
+ unsigned watermark = BCH_WATERMARK_stripe;
- do {
- old.v = new.v = v;
- new.remaining = u64s_remaining;
- } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
- old.v, new.v)) != old.v);
+ if (low_on_space)
+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
+ if (fifo_free(&j->pin) < j->pin.size / 4)
+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
+
+ if (watermark == j->watermark)
+ return;
+
+ swap(watermark, j->watermark);
+ if (watermark > j->watermark)
+ journal_wake(j);
}
static struct journal_space
@@ -162,7 +167,6 @@ void bch2_journal_space_available(struct journal *j)
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca;
unsigned clean, clean_ondisk, total;
- s64 u64s_remaining = 0;
unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
j->buf[1].buf_size >> 9);
unsigned i, nr_online = 0, nr_devs_want;
@@ -222,16 +226,10 @@ void bch2_journal_space_available(struct journal *j)
else
clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
- u64s_remaining = (u64) clean << 6;
- u64s_remaining -= (u64) total << 3;
- u64s_remaining = max(0LL, u64s_remaining);
- u64s_remaining /= 4;
- u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
+ journal_set_watermark(j, clean * 4 <= total);
out:
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
j->cur_entry_error = ret;
- journal_set_remaining(j, u64s_remaining);
- journal_set_watermark(j);
if (!ret)
journal_wake(j);
@@ -555,11 +553,6 @@ static u64 journal_seq_to_flush(struct journal *j)
/* Try to keep the journal at most half full: */
nr_buckets = ja->nr / 2;
- /* And include pre-reservations: */
- nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
- (ca->mi.bucket_size << 6) -
- journal_entry_overhead(j));
-
nr_buckets = min(nr_buckets, ja->nr);
bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
@@ -638,10 +631,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
msecs_to_jiffies(c->opts.journal_reclaim_delay)))
min_nr = 1;
- if (j->prereserved.reserved * 4 > j->prereserved.remaining)
- min_nr = 1;
-
- if (fifo_free(&j->pin) <= 32)
+ if (j->watermark != BCH_WATERMARK_stripe)
min_nr = 1;
if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
@@ -652,8 +642,6 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
trace_and_count(c, journal_reclaim_start, c,
direct, kicked,
min_nr, min_key_cache,
- j->prereserved.reserved,
- j->prereserved.remaining,
atomic_read(&c->btree_cache.dirty),
c->btree_cache.used,
atomic_long_read(&c->btree_key_cache.nr_dirty),
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 42504e16acb6cc..a756b69582e349 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -76,14 +76,6 @@ struct journal_res {
u64 seq;
};
-/*
- * For reserving space in the journal prior to getting a reservation on a
- * particular journal entry:
- */
-struct journal_preres {
- unsigned u64s;
-};
-
union journal_res_state {
struct {
atomic64_t counter;
@@ -104,22 +96,6 @@ union journal_res_state {
};
};
-union journal_preres_state {
- struct {
- atomic64_t counter;
- };
-
- struct {
- u64 v;
- };
-
- struct {
- u64 waiting:1,
- reserved:31,
- remaining:32;
- };
-};
-
/* bytes: */
#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
@@ -180,8 +156,6 @@ struct journal {
union journal_res_state reservations;
enum bch_watermark watermark;
- union journal_preres_state prereserved;
-
} __aligned(SMP_CACHE_BYTES);
unsigned long flags;
diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c
index b775cf0fb7cbf2..97790445e67ad2 100644
--- a/fs/bcachefs/six.c
+++ b/fs/bcachefs/six.c
@@ -163,8 +163,11 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
this_cpu_sub(*lock->readers, !ret);
preempt_enable();
- if (!ret && (old & SIX_LOCK_WAITING_write))
- ret = -1 - SIX_LOCK_write;
+ if (!ret) {
+ smp_mb();
+ if (atomic_read(&lock->state) & SIX_LOCK_WAITING_write)
+ ret = -1 - SIX_LOCK_write;
+ }
} else if (type == SIX_LOCK_write && lock->readers) {
if (try) {
atomic_add(SIX_LOCK_HELD_write, &lock->state);
diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h
index 86833445af2056..2d2e66a4e4681e 100644
--- a/fs/bcachefs/subvolume_types.h
+++ b/fs/bcachefs/subvolume_types.h
@@ -20,7 +20,7 @@ struct snapshot_t {
};
struct snapshot_table {
- struct snapshot_t s[0];
+ DECLARE_FLEX_ARRAY(struct snapshot_t, s);
};
typedef struct {
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 893304a1f06e6e..7857671159b491 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -196,10 +196,9 @@ DEFINE_EVENT(bio, journal_write,
TRACE_EVENT(journal_reclaim_start,
TP_PROTO(struct bch_fs *c, bool direct, bool kicked,
u64 min_nr, u64 min_key_cache,
- u64 prereserved, u64 prereserved_total,
u64 btree_cache_dirty, u64 btree_cache_total,
u64 btree_key_cache_dirty, u64 btree_key_cache_total),
- TP_ARGS(c, direct, kicked, min_nr, min_key_cache, prereserved, prereserved_total,
+ TP_ARGS(c, direct, kicked, min_nr, min_key_cache,
btree_cache_dirty, btree_cache_total,
btree_key_cache_dirty, btree_key_cache_total),
@@ -209,8 +208,6 @@ TRACE_EVENT(journal_reclaim_start,
__field(bool, kicked )
__field(u64, min_nr )
__field(u64, min_key_cache )
- __field(u64, prereserved )
- __field(u64, prereserved_total )
__field(u64, btree_cache_dirty )
__field(u64, btree_cache_total )
__field(u64, btree_key_cache_dirty )
@@ -223,22 +220,18 @@ TRACE_EVENT(journal_reclaim_start,
__entry->kicked = kicked;
__entry->min_nr = min_nr;
__entry->min_key_cache = min_key_cache;
- __entry->prereserved = prereserved;
- __entry->prereserved_total = prereserved_total;
__entry->btree_cache_dirty = btree_cache_dirty;
__entry->btree_cache_total = btree_cache_total;
__entry->btree_key_cache_dirty = btree_key_cache_dirty;
__entry->btree_key_cache_total = btree_key_cache_total;
),
- TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
+ TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu btree cache %llu/%llu key cache %llu/%llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->direct,
__entry->kicked,
__entry->min_nr,
__entry->min_key_cache,
- __entry->prereserved,
- __entry->prereserved_total,
__entry->btree_cache_dirty,
__entry->btree_cache_total,
__entry->btree_key_cache_dirty,
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index a39ff0c296ecfb..79d982674c1803 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -552,6 +552,14 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
s.v = v + 1;
s.defined = true;
} else {
+ /*
+ * Check if this option was set on the parent - if so, switched
+ * back to inheriting from the parent:
+ *
+ * rename() also has to deal with keeping inherited options up
+ * to date - see bch2_reinherit_attrs()
+ */
+ spin_lock(&dentry->d_lock);
if (!IS_ROOT(dentry)) {
struct bch_inode_info *dir =
to_bch_ei(d_inode(dentry->d_parent));
@@ -560,6 +568,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
} else {
s.v = 0;
}
+ spin_unlock(&dentry->d_lock);
s.defined = false;
}