diff options
Diffstat (limited to 'fs')
64 files changed, 696 insertions, 328 deletions
diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 29281b7c38870..0d6138bee2a3d 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -49,9 +49,6 @@ static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry) static inline void v9fs_fid_add_modes(struct p9_fid *fid, unsigned int s_flags, unsigned int s_cache, unsigned int f_flags) { - if (fid->qid.type != P9_QTFILE) - return; - if ((!s_cache) || ((fid->qid.version == 0) && !(s_flags & V9FS_IGNORE_QV)) || (s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) { diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index abdbbaee51846..348cc90bf9c56 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -520,6 +520,7 @@ const struct file_operations v9fs_file_operations = { .splice_read = v9fs_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync, + .setlease = simple_nosetlease, }; const struct file_operations v9fs_file_operations_dotl = { @@ -534,4 +535,5 @@ const struct file_operations v9fs_file_operations_dotl = { .splice_read = v9fs_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync_dotl, + .setlease = simple_nosetlease, }; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index b01b1bbf24937..47bd77199e20c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -83,7 +83,7 @@ static int p9mode2perm(struct v9fs_session_info *v9ses, int res; int mode = stat->mode; - res = mode & S_IALLUGO; + res = mode & 0777; /* S_IRWXUGO */ if (v9fs_proto_dotu(v9ses)) { if ((mode & P9_DMSETUID) == P9_DMSETUID) res |= S_ISUID; @@ -178,6 +178,9 @@ int v9fs_uflags2omode(int uflags, int extended) break; } + if (uflags & O_TRUNC) + ret |= P9_OTRUNC; + if (extended) { if (uflags & O_EXCL) ret |= P9_OEXCL; @@ -1061,8 +1064,6 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, struct v9fs_session_info *v9ses = sb->s_fs_info; struct v9fs_inode *v9inode = V9FS_I(inode); - set_nlink(inode, 1); - inode_set_atime(inode, stat->atime, 0); inode_set_mtime(inode, stat->mtime, 0); inode_set_ctime(inode, stat->mtime, 0); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 4236058c7bbd1..55e67e36ae682 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -244,6 +244,21 @@ done: return res; } +static int v9fs_drop_inode(struct inode *inode) +{ + struct v9fs_session_info *v9ses; + + v9ses = v9fs_inode2v9ses(inode); + if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) + return generic_drop_inode(inode); + /* + * in case of non cached mode always drop the + * inode because we want the inode attribute + * to always match that on the server. + */ + return 1; +} + static int v9fs_write_inode(struct inode *inode, struct writeback_control *wbc) { @@ -268,6 +283,7 @@ static const struct super_operations v9fs_super_ops = { .alloc_inode = v9fs_alloc_inode, .free_inode = v9fs_free_inode, .statfs = simple_statfs, + .drop_inode = v9fs_drop_inode, .evict_inode = v9fs_evict_inode, .show_options = v9fs_show_options, .umount_begin = v9fs_umount_begin, @@ -278,6 +294,7 @@ static const struct super_operations v9fs_super_ops_dotl = { .alloc_inode = v9fs_alloc_inode, .free_inode = v9fs_free_inode, .statfs = v9fs_statfs, + .drop_inode = v9fs_drop_inode, .evict_inode = v9fs_evict_inode, .show_options = v9fs_show_options, .umount_begin = v9fs_umount_begin, diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 114328acde720..a200442010025 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -49,13 +49,15 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, if (!bch2_dev_exists2(c, bp.k->p.inode)) return 0; + struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode); struct bpos bucket = bp_pos_to_bucket(c, bp.k->p); int ret = 0; - bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)), + bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || + !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)), c, err, - backpointer_pos_wrong, - "backpointer at wrong pos"); + backpointer_bucket_offset_wrong, + "backpointer bucket_offset wrong"); fsck_err: return ret; } @@ -468,7 +470,7 @@ found: goto err; } - bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL); + bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL); bio->bi_iter.bi_sector = p.ptr.offset; bch2_bio_map(bio, data_buf, bytes); ret = submit_bio_wait(bio); diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index da012ca7daee5..85949b9fd880c 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -53,14 +53,11 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c, u64 bucket_offset) { struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode); - struct bpos ret; - - ret = POS(bucket.inode, - (bucket_to_sector(ca, bucket.offset) << - MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); + struct bpos ret = POS(bucket.inode, + (bucket_to_sector(ca, bucket.offset) << + MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret))); - return ret; } diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index a31a5f706929e..91c3c1fef233d 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -709,6 +709,8 @@ struct btree_trans_buf { x(stripe_delete) \ x(reflink) \ x(fallocate) \ + x(fsync) \ + x(dio_write) \ x(discard) \ x(discard_fast) \ x(invalidate) \ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 364ae42022af1..f7fbfccd2b1e4 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -578,7 +578,8 @@ struct bch_member { __le64 nbuckets; /* device size */ __le16 first_bucket; /* index of first bucket used */ __le16 bucket_size; /* sectors */ - __le32 pad; + __u8 btree_bitmap_shift; + __u8 pad[3]; __le64 last_mount; /* time_t */ __le64 flags; @@ -587,6 +588,7 @@ struct bch_member { __le64 errors_at_reset[BCH_MEMBER_ERROR_NR]; __le64 errors_reset_time; __le64 seq; + __le64 btree_allocated_bitmap; }; #define BCH_MEMBER_V1_BYTES 56 @@ -876,7 +878,8 @@ struct bch_sb_field_downgrade { x(rebalance_work, BCH_VERSION(1, 3)) \ x(member_seq, BCH_VERSION(1, 4)) \ x(subvolume_fs_parent, BCH_VERSION(1, 5)) \ - x(btree_subvolume_children, BCH_VERSION(1, 6)) + x(btree_subvolume_children, BCH_VERSION(1, 6)) \ + x(mi_btree_bitmap, BCH_VERSION(1, 7)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -1314,7 +1317,7 @@ static inline __u64 __bset_magic(struct bch_sb *sb) x(write_buffer_keys, 11) \ x(datetime, 12) -enum { +enum bch_jset_entry_type { #define x(f, nr) BCH_JSET_ENTRY_##f = nr, BCH_JSET_ENTRY_TYPES() #undef x @@ -1360,7 +1363,7 @@ struct jset_entry_blacklist_v2 { x(inodes, 1) \ x(key_version, 2) -enum { +enum bch_fs_usage_type { #define x(f, nr) BCH_FS_USAGE_##f = nr, BCH_FS_USAGE_TYPES() #undef x @@ -1501,7 +1504,8 @@ enum btree_id_flags { BIT_ULL(KEY_TYPE_stripe)) \ x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \ BIT_ULL(KEY_TYPE_reflink_v)| \ - BIT_ULL(KEY_TYPE_indirect_inline_data)) \ + BIT_ULL(KEY_TYPE_indirect_inline_data)| \ + BIT_ULL(KEY_TYPE_error)) \ x(subvolumes, 8, 0, \ BIT_ULL(KEY_TYPE_subvolume)) \ x(snapshots, 9, 0, \ diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index cf23ff47bed8b..3a45d128f608d 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -314,6 +314,12 @@ static inline unsigned bkeyp_key_u64s(const struct bkey_format *format, return bkey_packed(k) ? format->key_u64s : BKEY_U64s; } +static inline bool bkeyp_u64s_valid(const struct bkey_format *f, + const struct bkey_packed *k) +{ + return ((unsigned) k->u64s - bkeyp_key_u64s(f, k) <= U8_MAX - BKEY_U64s); +} + static inline unsigned bkeyp_key_bytes(const struct bkey_format *format, const struct bkey_packed *k) { diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 5e52684764eb1..db336a43fc083 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -171,11 +171,15 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, if (type >= BKEY_TYPE_NR) return 0; - bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && + bkey_fsck_err_on((type == BKEY_TYPE_btree || + (flags & BKEY_INVALID_COMMIT)) && !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", - bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]); + bch2_btree_node_type_str(type), + k.k->type < KEY_TYPE_MAX + ? bch2_bkey_types[k.k->type] + : "(unknown)"); if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { bkey_fsck_err_on(k.k->size == 0, c, err, diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 84474324dba9b..02c70e813face 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -709,9 +709,31 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; - u32 seq; - BUG_ON(level + 1 >= BTREE_MAX_DEPTH); + if (unlikely(level >= BTREE_MAX_DEPTH)) { + int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u", + level, BTREE_MAX_DEPTH); + return ERR_PTR(ret); + } + + if (unlikely(!bkey_is_btree_ptr(&k->k))) { + struct printbuf buf = PRINTBUF; + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); + + int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf); + printbuf_exit(&buf); + return ERR_PTR(ret); + } + + if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) { + struct printbuf buf = PRINTBUF; + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); + + int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf); + printbuf_exit(&buf); + return ERR_PTR(ret); + } + /* * Parent node must be locked, else we could read in a btree node that's * been freed: @@ -752,34 +774,26 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, } set_btree_node_read_in_flight(b); - six_unlock_write(&b->c.lock); - seq = six_lock_seq(&b->c.lock); - six_unlock_intent(&b->c.lock); - /* Unlock before doing IO: */ - if (path && sync) - bch2_trans_unlock_noassert(trans); - - bch2_btree_node_read(trans, b, sync); + if (path) { + u32 seq = six_lock_seq(&b->c.lock); - if (!sync) - return NULL; + /* Unlock before doing IO: */ + six_unlock_intent(&b->c.lock); + bch2_trans_unlock_noassert(trans); - if (path) { - int ret = bch2_trans_relock(trans) ?: - bch2_btree_path_relock_intent(trans, path); - if (ret) { - BUG_ON(!trans->restarted); - return ERR_PTR(ret); - } - } + bch2_btree_node_read(trans, b, sync); - if (!six_relock_type(&b->c.lock, lock_type, seq)) { - BUG_ON(!path); + if (!sync) + return NULL; - trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path); - return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill)); + if (!six_relock_type(&b->c.lock, lock_type, seq)) + b = NULL; + } else { + bch2_btree_node_read(trans, b, sync); + if (lock_type == SIX_LOCK_read) + six_lock_downgrade(&b->c.lock); } return b; @@ -1112,18 +1126,19 @@ int bch2_btree_node_prefetch(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; - struct btree *b; BUG_ON(path && !btree_node_locked(path, level + 1)); BUG_ON(level >= BTREE_MAX_DEPTH); - b = btree_cache_find(bc, k); + struct btree *b = btree_cache_find(bc, k); if (b) return 0; b = bch2_btree_node_fill(trans, path, k, btree_id, level, SIX_LOCK_read, false); - return PTR_ERR_OR_ZERO(b); + if (!IS_ERR_OR_NULL(b)) + six_unlock_read(&b->c.lock); + return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b); } void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) @@ -1148,6 +1163,8 @@ wait_on_io: btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); + if (unlikely(b->hash_val != btree_ptr_hash_val(k))) + goto out; if (btree_node_dirty(b)) { __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); @@ -1162,7 +1179,7 @@ wait_on_io: btree_node_data_free(c, b); bch2_btree_node_hash_remove(bc, b); mutex_unlock(&bc->lock); - +out: six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index d2555da55c6da..791470b0c6545 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -828,6 +828,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, struct bch_fs *c = trans->c; struct bkey deleted = KEY(0, 0, 0); struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; + struct printbuf buf = PRINTBUF; int ret = 0; deleted.p = k->k->p; @@ -848,11 +849,23 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, if (ret) goto err; + if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, *k), + c, btree_bitmap_not_marked, + "btree ptr not marked in member info btree allocated bitmap\n %s", + (bch2_bkey_val_to_text(&buf, c, *k), + buf.buf))) { + mutex_lock(&c->sb_lock); + bch2_dev_btree_bitmap_mark(c, *k); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } + ret = commit_do(trans, NULL, NULL, 0, bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC)); fsck_err: err: + printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -1574,7 +1587,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); ret = PTR_ERR_OR_ZERO(new); if (ret) - return ret; + goto out; if (!r->refcount) new->k.type = KEY_TYPE_deleted; @@ -1582,6 +1595,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); ret = bch2_trans_update(trans, iter, new, 0); } +out: fsck_err: printbuf_exit(&buf); return ret; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index d7de82ac38935..debb0edc3455a 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -831,7 +831,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b, (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); } -static bool __bkey_valid(struct bch_fs *c, struct btree *b, +static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, struct bset *i, struct bkey_packed *k) { if (bkey_p_next(k) > vstruct_last(i)) @@ -840,7 +840,7 @@ static bool __bkey_valid(struct bch_fs *c, struct btree *b, if (k->format > KEY_FORMAT_CURRENT) return false; - if (k->u64s < bkeyp_key_u64s(&b->format, k)) + if (!bkeyp_u64s_valid(&b->format, k)) return false; struct printbuf buf = PRINTBUF; @@ -884,11 +884,13 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, "invalid bkey format %u", k->format)) goto drop_this_key; - if (btree_err_on(k->u64s < bkeyp_key_u64s(&b->format, k), + if (btree_err_on(!bkeyp_u64s_valid(&b->format, k), -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, btree_node_bkey_bad_u64s, - "k->u64s too small (%u < %u)", k->u64s, bkeyp_key_u64s(&b->format, k))) + "bad k->u64s %u (min %u max %zu)", k->u64s, + bkeyp_key_u64s(&b->format, k), + U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k))) goto drop_this_key; if (!write) @@ -947,13 +949,12 @@ drop_this_key: * do */ - if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) { + if (!bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) { for (next_good_key = 1; next_good_key < (u64 *) vstruct_last(i) - (u64 *) k; next_good_key++) - if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) + if (bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) goto got_good_key; - } /* @@ -1339,7 +1340,9 @@ start: rb->start_time); bio_put(&rb->bio); - if (saw_error && !btree_node_read_error(b)) { + if (saw_error && + !btree_node_read_error(b) && + c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { printbuf_reset(&buf); bch2_bpos_to_text(&buf, b->key.k.p); bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error", diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 1d58d447b386c..1c70836dd7cce 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -498,8 +498,13 @@ static inline void set_btree_iter_dontneed(struct btree_iter *iter) { struct btree_trans *trans = iter->trans; - if (!trans->restarted) - btree_iter_path(trans, iter)->preserve = false; + if (!iter->path || trans->restarted) + return; + + struct btree_path *path = btree_iter_path(trans, iter); + path->preserve = false; + if (path->ref == 1) + path->should_be_locked = false; } void *__bch2_trans_kmalloc(struct btree_trans *, size_t); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 88a3582a32757..e8c1c530cd95f 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -842,8 +842,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, * Newest freed entries are at the end of the list - once we hit one * that's too new to be freed, we can bail out: */ - scanned += bc->nr_freed_nonpcpu; - list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) @@ -857,11 +855,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, bc->nr_freed_nonpcpu--; } - if (scanned >= nr) - goto out; - - scanned += bc->nr_freed_pcpu; - list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) @@ -875,9 +868,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, bc->nr_freed_pcpu--; } - if (scanned >= nr) - goto out; - rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); if (bc->shrink_iter >= tbl->size) @@ -893,12 +883,12 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter); ck = container_of(pos, struct bkey_cached, hash); - if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) + if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { goto next; - - if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) + } else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) { clear_bit(BKEY_CACHED_ACCESSED, &ck->flags); - else if (bkey_cached_lock_for_evict(ck)) { + goto next; + } else if (bkey_cached_lock_for_evict(ck)) { bkey_cached_evict(bc, ck); bkey_cached_free(bc, ck); } @@ -916,7 +906,6 @@ next: } while (scanned < nr && bc->shrink_iter != start); rcu_read_unlock(); -out: memalloc_nofs_restore(flags); srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); mutex_unlock(&bc->lock); diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 556f76f5c84e1..c60794264da28 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -133,9 +133,19 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, if (le64_to_cpu(bn->magic) != bset_magic(c)) return; + if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) { + struct nonce nonce = btree_nonce(&bn->keys, 0); + unsigned bytes = (void *) &bn->keys - (void *) &bn->flags; + + bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes); + } + if (btree_id_is_alloc(BTREE_NODE_ID(bn))) return; + if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH) + return; + rcu_read_lock(); struct found_btree_node n = { .btree_id = BTREE_NODE_ID(bn), @@ -195,8 +205,13 @@ static int read_btree_nodes_worker(void *p) last_print = jiffies; } - try_read_btree_node(w->f, ca, bio, buf, - bucket * ca->mi.bucket_size + bucket_offset); + u64 sector = bucket * ca->mi.bucket_size + bucket_offset; + + if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap && + !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c))) + continue; + + try_read_btree_node(w->f, ca, bio, buf, sector); } err: bio_put(bio); @@ -287,6 +302,8 @@ again: start->max_key = bpos_predecessor(n->min_key); start->range_updated = true; + } else if (n->level) { + n->overwritten = true; } else { struct printbuf buf = PRINTBUF; diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index aa9da49707404..bbec91e8e6506 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -397,12 +397,13 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags struct bkey_cached *ck = (void *) path->l[0].b; unsigned new_u64s; struct bkey_i *new_k; + unsigned watermark = flags & BCH_WATERMARK_MASK; EBUG_ON(path->level); - if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && - bch2_btree_key_cache_must_wait(c) && - !(flags & BCH_TRANS_COMMIT_journal_reclaim)) + if (watermark < BCH_WATERMARK_reclaim && + !test_bit(BKEY_CACHED_DIRTY, &ck->flags) && + bch2_btree_key_cache_must_wait(c)) return -BCH_ERR_btree_insert_need_journal_reclaim; /* @@ -499,9 +500,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ } static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, - struct btree_insert_entry *btree_id_start) + unsigned btree_id_start) { - struct btree_insert_entry *i; bool trans_trigger_run; int ret, overwrite; @@ -514,13 +514,13 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, do { trans_trigger_run = false; - for (i = btree_id_start; - i < trans->updates + trans->nr_updates && i->btree_id <= btree_id; + for (unsigned i = btree_id_start; + i < trans->nr_updates && trans->updates[i].btree_id <= btree_id; i++) { - if (i->btree_id != btree_id) + if (trans->updates[i].btree_id != btree_id) continue; - ret = run_one_trans_trigger(trans, i, overwrite); + ret = run_one_trans_trigger(trans, trans->updates + i, overwrite); if (ret < 0) return ret; if (ret) @@ -534,8 +534,7 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, static int bch2_trans_commit_run_triggers(struct btree_trans *trans) { - struct btree_insert_entry *btree_id_start = trans->updates; - unsigned btree_id = 0; + unsigned btree_id = 0, btree_id_start = 0; int ret = 0; /* @@ -549,8 +548,8 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans) if (btree_id == BTREE_ID_alloc) continue; - while (btree_id_start < trans->updates + trans->nr_updates && - btree_id_start->btree_id < btree_id) + while (btree_id_start < trans->nr_updates && + trans->updates[btree_id_start].btree_id < btree_id) btree_id_start++; ret = run_btree_triggers(trans, btree_id, btree_id_start); @@ -558,11 +557,13 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans) return ret; } - trans_for_each_update(trans, i) { + for (unsigned idx = 0; idx < trans->nr_updates; idx++) { + struct btree_insert_entry *i = trans->updates + idx; + if (i->btree_id > BTREE_ID_alloc) break; if (i->btree_id == BTREE_ID_alloc) { - ret = run_btree_triggers(trans, BTREE_ID_alloc, i); + ret = run_btree_triggers(trans, BTREE_ID_alloc, idx); if (ret) return ret; break; @@ -826,7 +827,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags struct bch_fs *c = trans->c; int ret = 0, u64s_delta = 0; - trans_for_each_update(trans, i) { + for (unsigned idx = 0; idx < trans->nr_updates; idx++) { + struct btree_insert_entry *i = trans->updates + idx; if (i->cached) continue; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index e0c982a4195c7..c69b233c41bb3 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -321,9 +321,9 @@ struct bkey_cached { struct btree_bkey_cached_common c; unsigned long flags; + unsigned long btree_trans_barrier_seq; u16 u64s; bool valid; - u32 btree_trans_barrier_seq; struct bkey_cached_key key; struct rhash_head hash; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index c4a5e83a56a43..b4efd8cc4d1a2 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -21,6 +21,7 @@ #include "keylist.h" #include "recovery_passes.h" #include "replicas.h" +#include "sb-members.h" #include "super-io.h" #include "trace.h" @@ -605,6 +606,26 @@ static void btree_update_add_key(struct btree_update *as, bch2_keylist_push(keys); } +static bool btree_update_new_nodes_marked_sb(struct btree_update *as) +{ + for_each_keylist_key(&as->new_keys, k) + if (!bch2_dev_btree_bitmap_marked(as->c, bkey_i_to_s_c(k))) + return false; + return true; +} + +static void btree_update_new_nodes_mark_sb(struct btree_update *as) +{ + struct bch_fs *c = as->c; + + mutex_lock(&c->sb_lock); + for_each_keylist_key(&as->new_keys, k) + bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(k)); + + bch2_write_super(c); + mutex_unlock(&c->sb_lock); +} + /* * The transactional part of an interior btree node update, where we journal the * update we did to the interior node and update alloc info: @@ -662,6 +683,9 @@ static void btree_update_nodes_written(struct btree_update *as) if (ret) goto err; + if (!btree_update_new_nodes_marked_sb(as)) + btree_update_new_nodes_mark_sb(as); + /* * Wait for any in flight writes to finish before we free the old nodes * on disk: @@ -1280,23 +1304,29 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b) bch2_recalc_btree_reserve(c); } -static void bch2_btree_set_root(struct btree_update *as, - struct btree_trans *trans, - struct btree_path *path, - struct btree *b) +static int bch2_btree_set_root(struct btree_update *as, + struct btree_trans *trans, + struct btree_path *path, + struct btree *b, + bool nofail) { struct bch_fs *c = as->c; - struct btree *old; trace_and_count(c, btree_node_set_root, trans, b); - old = btree_node_root(c, b); + struct btree *old = btree_node_root(c, b); /* * Ensure no one is using the old root while we switch to the * new root: */ - bch2_btree_node_lock_write_nofail(trans, path, &old->c); + if (nofail) { + bch2_btree_node_lock_write_nofail(trans, path, &old->c); + } else { + int ret = bch2_btree_node_lock_write(trans, path, &old->c); + if (ret) + return ret; + } bch2_btree_set_root_inmem(c, b); @@ -1310,6 +1340,7 @@ static void bch2_btree_set_root(struct btree_update *as, * depend on the new root would have to update the new root. */ bch2_btree_node_unlock_write(trans, path, old); + return 0; } /* Interior node updates: */ @@ -1652,15 +1683,16 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, if (parent) { /* Split a non root node */ ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys); - if (ret) - goto err; } else if (n3) { - bch2_btree_set_root(as, trans, trans->paths + path, n3); + ret = bch2_btree_set_root(as, trans, trans->paths + path, n3, false); } else { /* Root filled up but didn't need to be split */ - bch2_btree_set_root(as, trans, trans->paths + path, n1); + ret = bch2_btree_set_root(as, trans, trans->paths + path, n1, false); } + if (ret) + goto err; + if (n3) { bch2_btree_update_get_open_buckets(as, n3); bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0); @@ -1863,7 +1895,9 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans * bch2_keylist_add(&as->parent_keys, &b->key); btree_split_insert_keys(as, trans, path_idx, n, &as->parent_keys); - bch2_btree_set_root(as, trans, path, n); + int ret = bch2_btree_set_root(as, trans, path, n, true); + BUG_ON(ret); + bch2_btree_update_get_open_buckets(as, n); bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); bch2_trans_node_add(trans, path, n); @@ -1916,6 +1950,22 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, BUG_ON(!trans->paths[path].should_be_locked); BUG_ON(!btree_node_locked(&trans->paths[path], level)); + /* + * Work around a deadlock caused by the btree write buffer not doing + * merges and leaving tons of merges for us to do - we really don't need + * to be doing merges at all from the interior update path, and if the + * interior update path is generating too many new interior updates we + * deadlock: + */ + if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates) + return 0; + + if ((flags & BCH_WATERMARK_MASK) <= BCH_WATERMARK_reclaim) { + flags &= ~BCH_WATERMARK_MASK; + flags |= BCH_WATERMARK_btree; + flags |= BCH_TRANS_COMMIT_journal_reclaim; + } + b = trans->paths[path].l[level].b; if ((sib == btree_prev_sib && bpos_eq(b->data->min_key, POS_MIN)) || @@ -2061,6 +2111,10 @@ err: bch2_path_put(trans, new_path, true); bch2_path_put(trans, sib_path, true); bch2_trans_verify_locks(trans); + if (ret == -BCH_ERR_journal_reclaim_would_deadlock) + ret = 0; + if (!ret) + ret = bch2_trans_relock(trans); return ret; err_free_update: bch2_btree_node_free_never_used(as, trans, n); @@ -2106,12 +2160,13 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); ret = bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys); - if (ret) - goto err; } else { - bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n); + ret = bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n, false); } + if (ret) + goto err; + bch2_btree_update_get_open_buckets(as, n); bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index baf63e2fddb64..36a6f42aba5e6 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -316,6 +316,16 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) bpos_gt(k->k.k.p, path->l[0].b->key.k.p)) { bch2_btree_node_unlock_write(trans, path, path->l[0].b); write_locked = false; + + ret = lockrestart_do(trans, + bch2_btree_iter_traverse(&iter) ?: + bch2_foreground_maybe_merge(trans, iter.path, 0, + BCH_WATERMARK_reclaim| + BCH_TRANS_COMMIT_journal_reclaim| + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc)); + if (ret) + goto err; } } @@ -382,10 +392,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) ret = commit_do(trans, NULL, NULL, BCH_WATERMARK_reclaim| + BCH_TRANS_COMMIT_journal_reclaim| BCH_TRANS_COMMIT_no_check_rw| BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_no_journal_res| - BCH_TRANS_COMMIT_journal_reclaim, + BCH_TRANS_COMMIT_no_journal_res , btree_write_buffered_insert(trans, i)); if (ret) goto err; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 00aaf4bb51397..f9af5adabe836 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -395,14 +395,6 @@ static inline const char *bch2_data_type_str(enum bch_data_type type) : "(invalid data type)"; } -static inline void bch2_prt_data_type(struct printbuf *out, enum bch_data_type type) -{ - if (type < BCH_DATA_NR) - prt_str(out, __bch2_data_types[type]); - else - prt_printf(out, "(invalid data type %u)", type); -} - /* disk reservations: */ static inline void bch2_disk_reservation_put(struct bch_fs *c, diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 72781aad6ba70..4d14f19f51850 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -232,13 +232,15 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a /* We need request_key() to be called before we punt to kthread: */ opt_set(thr->opts, nostart, true); + bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); + thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); if (!IS_ERR(thr->c) && thr->c->opts.errors == BCH_ON_ERROR_panic) thr->c->opts.errors = BCH_ON_ERROR_ro; - ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops); + ret = __bch2_run_thread_with_stdio(&thr->thr); out: darray_for_each(devs, i) kfree(*i); diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index 4701457f6381c..7ed779b411f61 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -429,15 +429,20 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio, extent_nonce(version, crc_old), bio); if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) { - bch_err(c, "checksum error in %s() (memory corruption or bug?)\n" - "expected %0llx:%0llx got %0llx:%0llx (old type %s new type %s)", - __func__, - crc_old.csum.hi, - crc_old.csum.lo, - merged.hi, - merged.lo, - bch2_csum_types[crc_old.csum_type], - bch2_csum_types[new_csum_type]); + struct printbuf buf = PRINTBUF; + prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n" + "expected %0llx:%0llx got %0llx:%0llx (old type ", + __func__, + crc_old.csum.hi, + crc_old.csum.lo, + merged.hi, + merged.lo); + bch2_prt_csum_type(&buf, crc_old.csum_type); + prt_str(&buf, " new type "); + bch2_prt_csum_type(&buf, new_csum_type); + prt_str(&buf, ")"); + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); return -EIO; } diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h index 1b8c2c1016dc6..e40499fde9a40 100644 --- a/fs/bcachefs/checksum.h +++ b/fs/bcachefs/checksum.h @@ -61,11 +61,12 @@ static inline void bch2_csum_err_msg(struct printbuf *out, struct bch_csum expected, struct bch_csum got) { - prt_printf(out, "checksum error: got "); + prt_str(out, "checksum error, type "); + bch2_prt_csum_type(out, type); + prt_str(out, ": got "); bch2_csum_to_text(out, type, got); prt_str(out, " should be "); bch2_csum_to_text(out, type, expected); - prt_printf(out, " type %s", bch2_csum_types[type]); } int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t); diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h index 58c2eb45570ff..607fd5e232c90 100644 --- a/fs/bcachefs/compress.h +++ b/fs/bcachefs/compress.h @@ -47,14 +47,6 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; } -static inline void bch2_prt_compression_type(struct printbuf *out, enum bch_compression_type type) -{ - if (type < BCH_COMPRESSION_TYPE_NR) - prt_str(out, __bch2_compression_types[type]); - else - prt_printf(out, "(invalid compression type %u)", type); -} - int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *, struct bch_extent_crc_unpacked *); int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *, diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 082075244e16a..556a217108d32 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -131,29 +131,33 @@ fsck_err: void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; - unsigned i, nr_data = s->nr_blocks - s->nr_redundant; + const struct bch_stripe *sp = bkey_s_c_to_stripe(k).v; + struct bch_stripe s = {}; + + memcpy(&s, sp, min(sizeof(s), bkey_val_bytes(k.k))); + + unsigned nr_data = s.nr_blocks - s.nr_redundant; + + prt_printf(out, "algo %u sectors %u blocks %u:%u csum ", + s.algorithm, + le16_to_cpu(s.sectors), + nr_data, + s.nr_redundant); + bch2_prt_csum_type(out, s.csum_type); + prt_printf(out, " gran %u", 1U << s.csum_granularity_bits); + + for (unsigned i = 0; i < s.nr_blocks; i++) { + const struct bch_extent_ptr *ptr = sp->ptrs + i; + + if ((void *) ptr >= bkey_val_end(k)) + break; + + bch2_extent_ptr_to_text(out, c, ptr); - prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u", - s->algorithm, - le16_to_cpu(s->sectors), - nr_data, - s->nr_redundant, - s->csum_type, - 1U << s->csum_granularity_bits); - - for (i = 0; i < s->nr_blocks; i++) { - const struct bch_extent_ptr *ptr = s->ptrs + i; - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - u32 offset; - u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); - - prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset); - if (i < nr_data) - prt_printf(out, "#%u", stripe_blockcount_get(s, i)); - prt_printf(out, " gen %u", ptr->gen); - if (ptr_stale(ca, ptr)) - prt_printf(out, " stale"); + if (s.csum_type < BCH_CSUM_NR && + i < nr_data && + stripe_blockcount_offset(&s, i) < bkey_val_bytes(k.k)) + prt_printf(out, "#%u", stripe_blockcount_get(sp, i)); } } @@ -607,10 +611,8 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) struct printbuf err = PRINTBUF; struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev); - prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n", - want.hi, want.lo, - got.hi, got.lo, - bch2_csum_types[v->csum_type]); + prt_str(&err, "stripe "); + bch2_csum_err_msg(&err, v->csum_type, want, got); prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i); bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key)); bch_err_ratelimited(ca, "%s", err.buf); diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index f4369b02e805f..f042616888b0a 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -32,6 +32,8 @@ static inline unsigned stripe_csums_per_device(const struct bch_stripe *s) static inline unsigned stripe_csum_offset(const struct bch_stripe *s, unsigned dev, unsigned csum_idx) { + EBUG_ON(s->csum_type >= BCH_CSUM_NR); + unsigned csum_bytes = bch_crc_bytes[s->csum_type]; return sizeof(struct bch_stripe) + diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 0e3ca99fbd2de..1a331e5392048 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -998,7 +998,9 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc prt_str(out, " cached"); if (ptr->unwritten) prt_str(out, " unwritten"); - if (ca && ptr_stale(ca, ptr)) + if (b >= ca->mi.first_bucket && + b < ca->mi.nbuckets && + ptr_stale(ca, ptr)) prt_printf(out, " stale"); } } @@ -1028,11 +1030,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct bch_extent_crc_unpacked crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress ", + prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ", crc.compressed_size, crc.uncompressed_size, - crc.offset, crc.nonce, - bch2_csum_types[crc.csum_type]); + crc.offset, crc.nonce); + bch2_prt_csum_type(out, crc.csum_type); + prt_str(out, " compress "); bch2_prt_compression_type(out, crc.compression_type); break; } diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index f49e6c0f0f683..b889370a50881 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -387,6 +387,8 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio) ret = dio->op.error ?: ((long) dio->written << 9); bio_put(&dio->op.wbio.bio); + bch2_write_ref_put(dio->op.c, BCH_WRITE_REF_dio_write); + /* inode->i_dio_count is our ref on inode and thus bch_fs */ inode_dio_end(&inode->v); @@ -590,22 +592,25 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) prefetch(&inode->ei_inode); prefetch((void *) &inode->ei_inode + 64); + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_dio_write)) + return -EROFS; + inode_lock(&inode->v); ret = generic_write_checks(req, iter); if (unlikely(ret <= 0)) - goto err; + goto err_put_write_ref; ret = file_remove_privs(file); if (unlikely(ret)) - goto err; + goto err_put_write_ref; ret = file_update_time(file); if (unlikely(ret)) - goto err; + goto err_put_write_ref; if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) - goto err; + goto err_put_write_ref; inode_dio_begin(&inode->v); bch2_pagecache_block_get(inode); @@ -645,7 +650,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) } ret = bch2_dio_write_loop(dio); -err: +out: if (locked) inode_unlock(&inode->v); return ret; @@ -653,7 +658,9 @@ err_put_bio: bch2_pagecache_block_put(inode); bio_put(bio); inode_dio_end(&inode->v); - goto err; +err_put_write_ref: + bch2_write_ref_put(c, BCH_WRITE_REF_dio_write); + goto out; } void bch2_fs_fs_io_direct_exit(struct bch_fs *c) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 8c70123b6a0c8..20b40477425f4 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -174,18 +174,18 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, static int bch2_flush_inode(struct bch_fs *c, struct bch_inode_info *inode) { - struct bch_inode_unpacked u; - int ret; - if (c->opts.journal_flush_disabled) return 0; - ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u); - if (ret) - return ret; + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) + return -EROFS; - return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: - bch2_inode_flush_nocow_writes(c, inode); + struct bch_inode_unpacked u; + int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?: + bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: + bch2_inode_flush_nocow_writes(c, inode); + bch2_write_ref_put(c, BCH_WRITE_REF_fsync); + return ret; } int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b5ea9fa1259d1..fce690007edfc 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -188,7 +188,8 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino BUG_ON(!old); if (unlikely(old != inode)) { - discard_new_inode(&inode->v); + __destroy_inode(&inode->v); + kmem_cache_free(bch2_inode_cache, inode); inode = old; } else { mutex_lock(&c->vfs_inodes_lock); @@ -225,8 +226,10 @@ static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans) if (unlikely(!inode)) { int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM); - if (ret && inode) - discard_new_inode(&inode->v); + if (ret && inode) { + __destroy_inode(&inode->v); + kmem_cache_free(bch2_inode_cache, inode); + } if (ret) return ERR_PTR(ret); } diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 725fcf46f6312..eb1f9d6f5a196 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -247,7 +247,7 @@ static void journal_entry_err_msg(struct printbuf *out, if (entry) { prt_str(out, " type="); - prt_str(out, bch2_jset_entry_types[entry->type]); + bch2_prt_jset_entry_type(out, entry->type); } if (!jset) { @@ -403,7 +403,8 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs jset_entry_for_each_key(entry, k) { if (!first) { prt_newline(out); - prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]); + bch2_prt_jset_entry_type(out, entry->type); + prt_str(out, ": "); } prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level); bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k)); @@ -563,9 +564,9 @@ static void journal_entry_usage_to_text(struct printbuf *out, struct bch_fs *c, struct jset_entry_usage *u = container_of(entry, struct jset_entry_usage, entry); - prt_printf(out, "type=%s v=%llu", - bch2_fs_usage_types[u->entry.btree_id], - le64_to_cpu(u->v)); + prt_str(out, "type="); + bch2_prt_fs_usage_type(out, u->entry.btree_id); + prt_printf(out, " v=%llu", le64_to_cpu(u->v)); } static int journal_entry_data_usage_validate(struct bch_fs *c, @@ -827,11 +828,11 @@ int bch2_journal_entry_validate(struct bch_fs *c, void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c, struct jset_entry *entry) { + bch2_prt_jset_entry_type(out, entry->type); + if (entry->type < BCH_JSET_ENTRY_NR) { - prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]); + prt_str(out, ": "); bch2_jset_entry_ops[entry->type].to_text(out, c, entry); - } else { - prt_printf(out, "(unknown type %u)", entry->type); } } @@ -1722,7 +1723,7 @@ static void journal_write_endio(struct bio *bio) percpu_ref_put(&ca->io_ref); } -static CLOSURE_CALLBACK(do_journal_write) +static CLOSURE_CALLBACK(journal_write_submit) { closure_type(w, struct journal_buf, io); struct journal *j = container_of(w, struct journal, buf[w->idx]); @@ -1767,6 +1768,44 @@ static CLOSURE_CALLBACK(do_journal_write) continue_at(cl, journal_write_done, j->wq); } +static CLOSURE_CALLBACK(journal_write_preflush) +{ + closure_type(w, struct journal_buf, io); + struct journal *j = container_of(w, struct journal, buf[w->idx]); + struct bch_fs *c = container_of(j, struct bch_fs, journal); + + if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { + spin_lock(&j->lock); + closure_wait(&j->async_wait, cl); + spin_unlock(&j->lock); + + continue_at(cl, journal_write_preflush, j->wq); + return; + } + + if (w->separate_flush) { + for_each_rw_member(c, ca) { + percpu_ref_get(&ca->io_ref); + + struct journal_device *ja = &ca->journal; + struct bio *bio = &ja->bio[w->idx]->bio; + bio_reset(bio, ca->disk_sb.bdev, + REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH); + bio->bi_end_io = journal_write_endio; + bio->bi_private = ca; + closure_bio_submit(bio, cl); + } + + continue_at(cl, journal_write_submit, j->wq); + } else { + /* + * no need to punt to another work item if we're not waiting on + * preflushes + */ + journal_write_submit(&cl->work); + } +} + static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) { struct bch_fs *c = container_of(j, struct bch_fs, journal); @@ -2032,23 +2071,9 @@ CLOSURE_CALLBACK(bch2_journal_write) goto err; if (!JSET_NO_FLUSH(w->data)) - closure_wait_event(&j->async_wait, j->seq_ondisk + 1 == le64_to_cpu(w->data->seq)); - - if (!JSET_NO_FLUSH(w->data) && w->separate_flush) { - for_each_rw_member(c, ca) { - percpu_ref_get(&ca->io_ref); - - struct journal_device *ja = &ca->journal; - struct bio *bio = &ja->bio[w->idx]->bio; - bio_reset(bio, ca->disk_sb.bdev, - REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH); - bio->bi_end_io = journal_write_endio; - bio->bi_private = ca; - closure_bio_submit(bio, cl); - } - } - - continue_at(cl, do_journal_write, j->wq); + continue_at(cl, journal_write_preflush, j->wq); + else + continue_at(cl, journal_write_submit, j->wq); return; no_io: continue_at(cl, journal_write_done, j->wq); diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index e1800c4119b5f..bb068fd724656 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -43,7 +43,7 @@ const char * const __bch2_btree_ids[] = { NULL }; -const char * const bch2_csum_types[] = { +static const char * const __bch2_csum_types[] = { BCH_CSUM_TYPES() NULL }; @@ -53,7 +53,7 @@ const char * const bch2_csum_opts[] = { NULL }; -const char * const __bch2_compression_types[] = { +static const char * const __bch2_compression_types[] = { BCH_COMPRESSION_TYPES() NULL }; @@ -83,18 +83,39 @@ const char * const bch2_member_states[] = { NULL }; -const char * const bch2_jset_entry_types[] = { +static const char * const __bch2_jset_entry_types[] = { BCH_JSET_ENTRY_TYPES() NULL }; -const char * const bch2_fs_usage_types[] = { +static const char * const __bch2_fs_usage_types[] = { BCH_FS_USAGE_TYPES() NULL }; #undef x +static void prt_str_opt_boundscheck(struct printbuf *out, const char * const opts[], + unsigned nr, const char *type, unsigned idx) +{ + if (idx < nr) + prt_str(out, opts[idx]); + else + prt_printf(out, "(unknown %s %u)", type, idx); +} + +#define PRT_STR_OPT_BOUNDSCHECKED(name, type) \ +void bch2_prt_##name(struct printbuf *out, type t) \ +{ \ + prt_str_opt_boundscheck(out, __bch2_##name##s, ARRAY_SIZE(__bch2_##name##s) - 1, #name, t);\ +} + +PRT_STR_OPT_BOUNDSCHECKED(jset_entry_type, enum bch_jset_entry_type); +PRT_STR_OPT_BOUNDSCHECKED(fs_usage_type, enum bch_fs_usage_type); +PRT_STR_OPT_BOUNDSCHECKED(data_type, enum bch_data_type); +PRT_STR_OPT_BOUNDSCHECKED(csum_type, enum bch_csum_type); +PRT_STR_OPT_BOUNDSCHECKED(compression_type, enum bch_compression_type); + static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res, struct printbuf *err) { diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 1ac4135cca1c3..84e452835a17d 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -16,18 +16,20 @@ extern const char * const bch2_version_upgrade_opts[]; extern const char * const bch2_sb_features[]; extern const char * const bch2_sb_compat[]; extern const char * const __bch2_btree_ids[]; -extern const char * const bch2_csum_types[]; extern const char * const bch2_csum_opts[]; -extern const char * const __bch2_compression_types[]; extern const char * const bch2_compression_opts[]; extern const char * const bch2_str_hash_types[]; extern const char * const bch2_str_hash_opts[]; extern const char * const __bch2_data_types[]; extern const char * const bch2_member_states[]; -extern const char * const bch2_jset_entry_types[]; -extern const char * const bch2_fs_usage_types[]; extern const char * const bch2_d_types[]; +void bch2_prt_jset_entry_type(struct printbuf *, enum bch_jset_entry_type); +void bch2_prt_fs_usage_type(struct printbuf *, enum bch_fs_usage_type); +void bch2_prt_data_type(struct printbuf *, enum bch_data_type); +void bch2_prt_csum_type(struct printbuf *, enum bch_csum_type); +void bch2_prt_compression_type(struct printbuf *, enum bch_compression_type); + static inline const char *bch2_d_type_str(unsigned d_type) { return (d_type < BCH_DT_MAX ? bch2_d_types[d_type] : NULL) ?: "(bad d_type)"; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 0f328aba9760b..be5b476193270 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -249,7 +249,10 @@ int bch2_journal_replay(struct bch_fs *c) struct journal_key *k = *kp; - replay_now_at(j, k->journal_seq); + if (k->journal_seq) + replay_now_at(j, k->journal_seq); + else + replay_now_at(j, j->replay_journal_seq_end); ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc| diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index cb501460d6152..0cec0f7d97035 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -44,7 +44,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c) set_bit(BCH_FS_may_go_rw, &c->flags); - if (keys->nr || c->opts.fsck || !c->sb.clean) + if (keys->nr || c->opts.fsck || !c->sb.clean || c->recovery_passes_explicit) return bch2_fs_read_write_early(c); return 0; } diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index 5980ba2563fe9..35ca3f138de6f 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -29,6 +29,14 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle for (entry = clean->start; entry < (struct jset_entry *) vstruct_end(&clean->field); entry = vstruct_next(entry)) { + if (vstruct_end(entry) > vstruct_end(&clean->field)) { + bch_err(c, "journal entry (u64s %u) overran end of superblock clean section (u64s %u) by %zu", + le16_to_cpu(entry->u64s), le32_to_cpu(clean->field.u64s), + (u64 *) vstruct_end(entry) - (u64 *) vstruct_end(&clean->field)); + bch2_sb_error_count(c, BCH_FSCK_ERR_sb_clean_entry_overrun); + return -BCH_ERR_fsck_repair_unimplemented; + } + ret = bch2_journal_entry_validate(c, NULL, entry, le16_to_cpu(c->disk_sb.sb->version), BCH_SB_BIG_ENDIAN(c->disk_sb.sb), diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index d6f81179c3a29..a98ef940b7a32 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -51,7 +51,10 @@ BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \ x(btree_subvolume_children, \ BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \ - BCH_FSCK_ERR_subvol_children_not_set) + BCH_FSCK_ERR_subvol_children_not_set) \ + x(mi_btree_bitmap, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_btree_bitmap_not_marked) #define DOWNGRADE_TABLE() diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h index d7d609131030a..06c7a644f4a44 100644 --- a/fs/bcachefs/sb-errors_types.h +++ b/fs/bcachefs/sb-errors_types.h @@ -130,7 +130,7 @@ x(bucket_gens_nonzero_for_invalid_buckets, 122) \ x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \ x(need_discard_freespace_key_bad, 124) \ - x(backpointer_pos_wrong, 125) \ + x(backpointer_bucket_offset_wrong, 125) \ x(backpointer_to_missing_device, 126) \ x(backpointer_to_missing_alloc, 127) \ x(backpointer_to_missing_ptr, 128) \ @@ -270,7 +270,9 @@ x(btree_ptr_v2_min_key_bad, 262) \ x(btree_root_unreadable_and_scan_found_nothing, 263) \ x(snapshot_node_missing, 264) \ - x(dup_backpointer_to_bad_csum_extent, 265) + x(dup_backpointer_to_bad_csum_extent, 265) \ + x(btree_bitmap_not_marked, 266) \ + x(sb_clean_entry_overrun, 267) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index eff5ce18c69c0..5b8e621ac5eb5 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "btree_cache.h" #include "disk_groups.h" #include "opts.h" #include "replicas.h" @@ -426,3 +427,55 @@ void bch2_dev_errors_reset(struct bch_dev *ca) bch2_write_super(c); mutex_unlock(&c->sb_lock); } + +/* + * Per member "range has btree nodes" bitmap: + * + * This is so that if we ever have to run the btree node scan to repair we don't + * have to scan full devices: + */ + +bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k) +{ + bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) + if (!bch2_dev_btree_bitmap_marked_sectors(bch_dev_bkey_exists(c, ptr->dev), + ptr->offset, btree_sectors(c))) + return false; + return true; +} + +static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev, + u64 start, unsigned sectors) +{ + struct bch_member *m = __bch2_members_v2_get_mut(mi, dev); + u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap); + + u64 end = start + sectors; + + int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6); + if (resize > 0) { + u64 new_bitmap = 0; + + for (unsigned i = 0; i < 64; i++) + if (bitmap & BIT_ULL(i)) + new_bitmap |= BIT_ULL(i >> resize); + bitmap = new_bitmap; + m->btree_bitmap_shift += resize; + } + + for (unsigned bit = start >> m->btree_bitmap_shift; + (u64) bit << m->btree_bitmap_shift < end; + bit++) + bitmap |= BIT_ULL(bit); + + m->btree_allocated_bitmap = cpu_to_le64(bitmap); +} + +void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k) +{ + lockdep_assert_held(&c->sb_lock); + + struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); + bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) + __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c)); +} diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index be0a941832715..5efa64eca5f85 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -3,6 +3,7 @@ #define _BCACHEFS_SB_MEMBERS_H #include "darray.h" +#include "bkey_types.h" extern char * const bch2_member_error_strs[]; @@ -220,6 +221,8 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) : 1, .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi), .valid = bch2_member_exists(mi), + .btree_bitmap_shift = mi->btree_bitmap_shift, + .btree_allocated_bitmap = le64_to_cpu(mi->btree_allocated_bitmap), }; } @@ -228,4 +231,22 @@ void bch2_sb_members_from_cpu(struct bch_fs *); void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *); void bch2_dev_errors_reset(struct bch_dev *); +static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start, unsigned sectors) +{ + u64 end = start + sectors; + + if (end > 64ULL << ca->mi.btree_bitmap_shift) + return false; + + for (unsigned bit = start >> ca->mi.btree_bitmap_shift; + (u64) bit << ca->mi.btree_bitmap_shift < end; + bit++) + if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit))) + return false; + return true; +} + +bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c); +void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c); + #endif /* _BCACHEFS_SB_MEMBERS_H */ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 5eee055ee2721..08ea3dbbbe97c 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -700,8 +700,11 @@ retry: return -ENOMEM; sb->sb_name = kstrdup(path, GFP_KERNEL); - if (!sb->sb_name) - return -ENOMEM; + if (!sb->sb_name) { + ret = -ENOMEM; + prt_printf(&err, "error allocating memory for sb_name"); + goto err; + } #ifndef __KERNEL__ if (opt_get(*opts, direct_io) == false) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 8daf80a38d60c..88e214c609bb2 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -544,6 +544,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); + bch2_fs_allocator_background_exit(c); bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); bch2_fs_snapshots_exit(c); diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h index ec784d975f665..11bcef170c2c2 100644 --- a/fs/bcachefs/super_types.h +++ b/fs/bcachefs/super_types.h @@ -37,6 +37,8 @@ struct bch_member_cpu { u8 durability; u8 freespace_initialized; u8 valid; + u8 btree_bitmap_shift; + u64 btree_allocated_bitmap; }; #endif /* _BCACHEFS_SUPER_TYPES_H */ diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index b18b0cc81b594..5be92fe3f4ea4 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -25,6 +25,7 @@ #include "ec.h" #include "inode.h" #include "journal.h" +#include "journal_reclaim.h" #include "keylist.h" #include "move.h" #include "movinggc.h" @@ -138,6 +139,7 @@ do { \ write_attribute(trigger_gc); write_attribute(trigger_discards); write_attribute(trigger_invalidates); +write_attribute(trigger_journal_flush); write_attribute(prune_cache); write_attribute(btree_wakeup); rw_attribute(btree_gc_periodic); @@ -500,7 +502,7 @@ STORE(bch2_fs) /* Debugging: */ - if (!test_bit(BCH_FS_rw, &c->flags)) + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)) return -EROFS; if (attr == &sysfs_prune_cache) { @@ -533,6 +535,11 @@ STORE(bch2_fs) if (attr == &sysfs_trigger_invalidates) bch2_do_invalidates(c); + if (attr == &sysfs_trigger_journal_flush) { + bch2_journal_flush_all_pins(&c->journal); + bch2_journal_meta(&c->journal); + } + #ifdef CONFIG_BCACHEFS_TESTS if (attr == &sysfs_perf_test) { char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp; @@ -553,6 +560,7 @@ STORE(bch2_fs) size = ret; } #endif + bch2_write_ref_put(c, BCH_WRITE_REF_sysfs); return size; } SYSFS_OPS(bch2_fs); @@ -651,6 +659,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_trigger_gc, &sysfs_trigger_discards, &sysfs_trigger_invalidates, + &sysfs_trigger_journal_flush, &sysfs_prune_cache, &sysfs_btree_wakeup, diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index 940db15d6a939..b1af7ac430f66 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -294,16 +294,27 @@ static int thread_with_stdio_fn(void *arg) return 0; } -int bch2_run_thread_with_stdio(struct thread_with_stdio *thr, - const struct thread_with_stdio_ops *ops) +void bch2_thread_with_stdio_init(struct thread_with_stdio *thr, + const struct thread_with_stdio_ops *ops) { stdio_buf_init(&thr->stdio.input); stdio_buf_init(&thr->stdio.output); thr->ops = ops; +} +int __bch2_run_thread_with_stdio(struct thread_with_stdio *thr) +{ return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, thread_with_stdio_fn); } +int bch2_run_thread_with_stdio(struct thread_with_stdio *thr, + const struct thread_with_stdio_ops *ops) +{ + bch2_thread_with_stdio_init(thr, ops); + + return __bch2_run_thread_with_stdio(thr); +} + int bch2_run_thread_with_stdout(struct thread_with_stdio *thr, const struct thread_with_stdio_ops *ops) { diff --git a/fs/bcachefs/thread_with_file.h b/fs/bcachefs/thread_with_file.h index af54ea8f5b0ff..1d63d14d7dcae 100644 --- a/fs/bcachefs/thread_with_file.h +++ b/fs/bcachefs/thread_with_file.h @@ -63,6 +63,9 @@ struct thread_with_stdio { const struct thread_with_stdio_ops *ops; }; +void bch2_thread_with_stdio_init(struct thread_with_stdio *, + const struct thread_with_stdio_ops *); +int __bch2_run_thread_with_stdio(struct thread_with_stdio *); int bch2_run_thread_with_stdio(struct thread_with_stdio *, const struct thread_with_stdio_ops *); int bch2_run_thread_with_stdout(struct thread_with_stdio *, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index beedd6ed64d39..257d044bca915 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3464,6 +3464,14 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, if (root_id != BTRFS_TREE_LOG_OBJECTID) { struct btrfs_ref generic_ref = { 0 }; + /* + * Assert that the extent buffer is not cleared due to + * EXTENT_BUFFER_ZONED_ZEROOUT. Please refer + * btrfs_clear_buffer_dirty() and btree_csum_one_bio() for + * detail. + */ + ASSERT(btrfs_header_bytenr(buf) != 0); + btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, buf->start, buf->len, parent, btrfs_header_owner(buf)); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 61594eaf1f896..2776112dbdf8d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -681,31 +681,21 @@ static void end_bbio_data_read(struct btrfs_bio *bbio) int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array, gfp_t extra_gfp) { + const gfp_t gfp = GFP_NOFS | extra_gfp; unsigned int allocated; for (allocated = 0; allocated < nr_pages;) { unsigned int last = allocated; - allocated = alloc_pages_bulk_array(GFP_NOFS | extra_gfp, - nr_pages, page_array); - - if (allocated == nr_pages) - return 0; - - /* - * During this iteration, no page could be allocated, even - * though alloc_pages_bulk_array() falls back to alloc_page() - * if it could not bulk-allocate. So we must be out of memory. - */ - if (allocated == last) { + allocated = alloc_pages_bulk_array(gfp, nr_pages, page_array); + if (unlikely(allocated == last)) { + /* No progress, fail and do cleanup. */ for (int i = 0; i < allocated; i++) { __free_page(page_array[i]); page_array[i] = NULL; } return -ENOMEM; } - - memalloc_retry_wait(GFP_NOFS); } return 0; } @@ -4154,7 +4144,7 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans, * The actual zeroout of the buffer will happen later in * btree_csum_one_bio. */ - if (btrfs_is_zoned(fs_info)) { + if (btrfs_is_zoned(fs_info) && test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { set_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags); return; } @@ -4193,6 +4183,7 @@ void set_extent_buffer_dirty(struct extent_buffer *eb) num_folios = num_extent_folios(eb); WARN_ON(atomic_read(&eb->refs) == 0); WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); + WARN_ON(test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags)); if (!was_dirty) { bool subpage = eb->fs_info->nodesize < PAGE_SIZE; diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index b6cad106c37e4..0b2da7b7e2ad0 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -310,6 +310,10 @@ struct cuse_init_args { /** * cuse_process_init_reply - finish initializing CUSE channel * + * @fm: The fuse mount information containing the CUSE connection. + * @args: The arguments passed to the init reply. + * @error: The error code signifying if any error occurred during the process. + * * This function creates the character device and sets up all the * required data structures for it. Please read the comment at the * top of this file for high level overview. diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 4a6df591add61..2b0d4781f3948 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1321,6 +1321,7 @@ retry: err = fuse_do_statx(inode, file, stat); if (err == -ENOSYS) { fc->no_statx = 1; + err = 0; goto retry; } } else { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a56e7bffd0004..b57ce41576407 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1362,7 +1362,7 @@ static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from, bool *exclusive) { struct inode *inode = file_inode(iocb->ki_filp); - struct fuse_file *ff = iocb->ki_filp->private_data; + struct fuse_inode *fi = get_fuse_inode(inode); *exclusive = fuse_dio_wr_exclusive_lock(iocb, from); if (*exclusive) { @@ -1377,7 +1377,7 @@ static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from, * have raced, so check it again. */ if (fuse_io_past_eof(iocb, from) || - fuse_file_uncached_io_start(inode, ff, NULL) != 0) { + fuse_inode_uncached_io_start(fi, NULL) != 0) { inode_unlock_shared(inode); inode_lock(inode); *exclusive = true; @@ -1388,13 +1388,13 @@ static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from, static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive) { struct inode *inode = file_inode(iocb->ki_filp); - struct fuse_file *ff = iocb->ki_filp->private_data; + struct fuse_inode *fi = get_fuse_inode(inode); if (exclusive) { inode_unlock(inode); } else { /* Allow opens in caching mode after last parallel dio end */ - fuse_file_uncached_io_end(inode, ff); + fuse_inode_uncached_io_end(fi); inode_unlock_shared(inode); } } @@ -2574,8 +2574,10 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) * First mmap of direct_io file enters caching inode io mode. * Also waits for parallel dio writers to go into serial mode * (exclusive instead of shared lock). + * After first mmap, the inode stays in caching io mode until + * the direct_io file release. */ - rc = fuse_file_cached_io_start(inode, ff); + rc = fuse_file_cached_io_open(inode, ff); if (rc) return rc; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index b24084b60864e..f239196103137 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1394,9 +1394,10 @@ int fuse_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); /* iomode.c */ -int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff); -int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb); -void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff); +int fuse_file_cached_io_open(struct inode *inode, struct fuse_file *ff); +int fuse_inode_uncached_io_start(struct fuse_inode *fi, + struct fuse_backing *fb); +void fuse_inode_uncached_io_end(struct fuse_inode *fi); int fuse_file_io_open(struct file *file, struct inode *inode); void fuse_file_io_release(struct fuse_file *ff, struct inode *inode); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 3a5d888783353..99e44ea7d8756 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -175,6 +175,7 @@ static void fuse_evict_inode(struct inode *inode) } } if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { + WARN_ON(fi->iocachectr != 0); WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->queued_writes)); } diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c index c653ddcf05787..c99e285f3183e 100644 --- a/fs/fuse/iomode.c +++ b/fs/fuse/iomode.c @@ -21,12 +21,13 @@ static inline bool fuse_is_io_cache_wait(struct fuse_inode *fi) } /* - * Start cached io mode. + * Called on cached file open() and on first mmap() of direct_io file. + * Takes cached_io inode mode reference to be dropped on file release. * * Blocks new parallel dio writes and waits for the in-progress parallel dio * writes to complete. */ -int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff) +int fuse_file_cached_io_open(struct inode *inode, struct fuse_file *ff) { struct fuse_inode *fi = get_fuse_inode(inode); @@ -67,10 +68,9 @@ int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff) return 0; } -static void fuse_file_cached_io_end(struct inode *inode, struct fuse_file *ff) +static void fuse_file_cached_io_release(struct fuse_file *ff, + struct fuse_inode *fi) { - struct fuse_inode *fi = get_fuse_inode(inode); - spin_lock(&fi->lock); WARN_ON(fi->iocachectr <= 0); WARN_ON(ff->iomode != IOM_CACHED); @@ -82,16 +82,15 @@ static void fuse_file_cached_io_end(struct inode *inode, struct fuse_file *ff) } /* Start strictly uncached io mode where cache access is not allowed */ -int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb) +int fuse_inode_uncached_io_start(struct fuse_inode *fi, struct fuse_backing *fb) { - struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_backing *oldfb; int err = 0; spin_lock(&fi->lock); /* deny conflicting backing files on same fuse inode */ oldfb = fuse_inode_backing(fi); - if (oldfb && oldfb != fb) { + if (fb && oldfb && oldfb != fb) { err = -EBUSY; goto unlock; } @@ -99,12 +98,10 @@ int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struc err = -ETXTBSY; goto unlock; } - WARN_ON(ff->iomode != IOM_NONE); fi->iocachectr--; - ff->iomode = IOM_UNCACHED; /* fuse inode holds a single refcount of backing file */ - if (!oldfb) { + if (fb && !oldfb) { oldfb = fuse_inode_backing_set(fi, fb); WARN_ON_ONCE(oldfb != NULL); } else { @@ -115,15 +112,29 @@ unlock: return err; } -void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff) +/* Takes uncached_io inode mode reference to be dropped on file release */ +static int fuse_file_uncached_io_open(struct inode *inode, + struct fuse_file *ff, + struct fuse_backing *fb) { struct fuse_inode *fi = get_fuse_inode(inode); + int err; + + err = fuse_inode_uncached_io_start(fi, fb); + if (err) + return err; + + WARN_ON(ff->iomode != IOM_NONE); + ff->iomode = IOM_UNCACHED; + return 0; +} + +void fuse_inode_uncached_io_end(struct fuse_inode *fi) +{ struct fuse_backing *oldfb = NULL; spin_lock(&fi->lock); WARN_ON(fi->iocachectr >= 0); - WARN_ON(ff->iomode != IOM_UNCACHED); - ff->iomode = IOM_NONE; fi->iocachectr++; if (!fi->iocachectr) { wake_up(&fi->direct_io_waitq); @@ -134,6 +145,15 @@ void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff) fuse_backing_put(oldfb); } +/* Drop uncached_io reference from passthrough open */ +static void fuse_file_uncached_io_release(struct fuse_file *ff, + struct fuse_inode *fi) +{ + WARN_ON(ff->iomode != IOM_UNCACHED); + ff->iomode = IOM_NONE; + fuse_inode_uncached_io_end(fi); +} + /* * Open flags that are allowed in combination with FOPEN_PASSTHROUGH. * A combination of FOPEN_PASSTHROUGH and FOPEN_DIRECT_IO means that read/write @@ -163,7 +183,7 @@ static int fuse_file_passthrough_open(struct inode *inode, struct file *file) return PTR_ERR(fb); /* First passthrough file open denies caching inode io mode */ - err = fuse_file_uncached_io_start(inode, ff, fb); + err = fuse_file_uncached_io_open(inode, ff, fb); if (!err) return 0; @@ -216,7 +236,7 @@ int fuse_file_io_open(struct file *file, struct inode *inode) if (ff->open_flags & FOPEN_PASSTHROUGH) err = fuse_file_passthrough_open(inode, file); else - err = fuse_file_cached_io_start(inode, ff); + err = fuse_file_cached_io_open(inode, ff); if (err) goto fail; @@ -236,8 +256,10 @@ fail: /* No more pending io and no new io possible to inode via open/mmapped file */ void fuse_file_io_release(struct fuse_file *ff, struct inode *inode) { + struct fuse_inode *fi = get_fuse_inode(inode); + /* - * Last parallel dio close allows caching inode io mode. + * Last passthrough file close allows caching inode io mode. * Last caching file close exits caching inode io mode. */ switch (ff->iomode) { @@ -245,10 +267,10 @@ void fuse_file_io_release(struct fuse_file *ff, struct inode *inode) /* Nothing to do */ break; case IOM_UNCACHED: - fuse_file_uncached_io_end(inode, ff); + fuse_file_uncached_io_release(ff, fi); break; case IOM_CACHED: - fuse_file_cached_io_end(inode, ff); + fuse_file_cached_io_release(ff, fi); break; } } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index fac938f563ad0..1955481832e03 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3490,11 +3490,13 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct dentry *dentry, const u32 *bmval, int ignore_crossmnt) { + DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)); struct nfsd4_fattr_args args; struct svc_fh *tempfh = NULL; int starting_len = xdr->buf->len; __be32 *attrlen_p, status; int attrlen_offset; + u32 attrmask[3]; int err; struct nfsd4_compoundres *resp = rqstp->rq_resp; u32 minorversion = resp->cstate.minorversion; @@ -3502,10 +3504,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, .mnt = exp->ex_path.mnt, .dentry = dentry, }; - union { - u32 attrmask[3]; - unsigned long mask[2]; - } u; unsigned long bit; bool file_modified = false; u64 size = 0; @@ -3521,20 +3519,19 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, /* * Make a local copy of the attribute bitmap that can be modified. */ - memset(&u, 0, sizeof(u)); - u.attrmask[0] = bmval[0]; - u.attrmask[1] = bmval[1]; - u.attrmask[2] = bmval[2]; + attrmask[0] = bmval[0]; + attrmask[1] = bmval[1]; + attrmask[2] = bmval[2]; args.rdattr_err = 0; if (exp->ex_fslocs.migrated) { - status = fattr_handle_absent_fs(&u.attrmask[0], &u.attrmask[1], - &u.attrmask[2], &args.rdattr_err); + status = fattr_handle_absent_fs(&attrmask[0], &attrmask[1], + &attrmask[2], &args.rdattr_err); if (status) goto out; } args.size = 0; - if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { + if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry), &file_modified, &size); if (status) @@ -3553,16 +3550,16 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, if (!(args.stat.result_mask & STATX_BTIME)) /* underlying FS does not offer btime so we can't share it */ - u.attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE; - if ((u.attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | + attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE; + if ((attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || - (u.attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | + (attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL))) { err = vfs_statfs(&path, &args.statfs); if (err) goto out_nfserr; } - if ((u.attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && + if ((attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) { tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); status = nfserr_jukebox; @@ -3577,10 +3574,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, args.fhp = fhp; args.acl = NULL; - if (u.attrmask[0] & FATTR4_WORD0_ACL) { + if (attrmask[0] & FATTR4_WORD0_ACL) { err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl); if (err == -EOPNOTSUPP) - u.attrmask[0] &= ~FATTR4_WORD0_ACL; + attrmask[0] &= ~FATTR4_WORD0_ACL; else if (err == -EINVAL) { status = nfserr_attrnotsupp; goto out; @@ -3592,17 +3589,17 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, #ifdef CONFIG_NFSD_V4_SECURITY_LABEL args.context = NULL; - if ((u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) || - u.attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { + if ((attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) || + attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { if (exp->ex_flags & NFSEXP_SECURITY_LABEL) err = security_inode_getsecctx(d_inode(dentry), &args.context, &args.contextlen); else err = -EOPNOTSUPP; args.contextsupport = (err == 0); - if (u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) { + if (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) { if (err == -EOPNOTSUPP) - u.attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL; else if (err) goto out_nfserr; } @@ -3610,8 +3607,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ /* attrmask */ - status = nfsd4_encode_bitmap4(xdr, u.attrmask[0], - u.attrmask[1], u.attrmask[2]); + status = nfsd4_encode_bitmap4(xdr, attrmask[0], attrmask[1], + attrmask[2]); if (status) goto out; @@ -3620,7 +3617,9 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, attrlen_p = xdr_reserve_space(xdr, XDR_UNIT); if (!attrlen_p) goto out_resource; - for_each_set_bit(bit, (const unsigned long *)&u.mask, + bitmap_from_arr32(attr_bitmap, attrmask, + ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)); + for_each_set_bit(bit, attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)) { status = nfsd4_enc_fattr4_encode_ops[bit](xdr, &args); if (status != nfs_ok) diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index bc846b904b68d..aee40db7a036f 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -240,7 +240,7 @@ nilfs_filetype_table[NILFS_FT_MAX] = { #define S_SHIFT 12 static unsigned char -nilfs_type_by_mode[S_IFMT >> S_SHIFT] = { +nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 1b594307c9d5a..202ff91281560 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -711,7 +711,7 @@ struct smb2_close_rsp { __le16 StructureSize; /* 60 */ __le16 Flags; __le32 Reserved; - struct_group(network_open_info, + struct_group_attr(network_open_info, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index 686b321c5a8bb..f4e55199938d5 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -340,23 +340,24 @@ enum KSMBD_TREE_CONN_STATUS { /* * Share config flags. */ -#define KSMBD_SHARE_FLAG_INVALID (0) -#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0) -#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1) -#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2) -#define KSMBD_SHARE_FLAG_READONLY BIT(3) -#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4) -#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5) -#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6) -#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7) -#define KSMBD_SHARE_FLAG_PIPE BIT(8) -#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9) -#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10) -#define KSMBD_SHARE_FLAG_STREAMS BIT(11) -#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12) -#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13) -#define KSMBD_SHARE_FLAG_UPDATE BIT(14) -#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15) +#define KSMBD_SHARE_FLAG_INVALID (0) +#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0) +#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1) +#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2) +#define KSMBD_SHARE_FLAG_READONLY BIT(3) +#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4) +#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5) +#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6) +#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7) +#define KSMBD_SHARE_FLAG_PIPE BIT(8) +#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9) +#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10) +#define KSMBD_SHARE_FLAG_STREAMS BIT(11) +#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12) +#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13) +#define KSMBD_SHARE_FLAG_UPDATE BIT(14) +#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15) +#define KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY BIT(16) /* * Tree connect request flags. diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index c0788188aa82f..c67fbc8d6683e 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -167,20 +167,17 @@ static void __handle_ksmbd_work(struct ksmbd_work *work, int rc; bool is_chained = false; - if (conn->ops->allocate_rsp_buf(work)) - return; - if (conn->ops->is_transform_hdr && conn->ops->is_transform_hdr(work->request_buf)) { rc = conn->ops->decrypt_req(work); - if (rc < 0) { - conn->ops->set_rsp_status(work, STATUS_DATA_ERROR); - goto send; - } - + if (rc < 0) + return; work->encrypted = true; } + if (conn->ops->allocate_rsp_buf(work)) + return; + rc = conn->ops->init_rsp_hdr(work); if (rc) { /* either uid or tid is not correct */ diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 5723bbf372d7c..355824151c2d8 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -535,6 +535,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) if (cmd == SMB2_QUERY_INFO_HE) { struct smb2_query_info_req *req; + if (get_rfc1002_len(work->request_buf) < + offsetof(struct smb2_query_info_req, OutputBufferLength)) + return -EINVAL; + req = smb2_get_msg(work->request_buf); if ((req->InfoType == SMB2_O_INFO_FILE && (req->FileInfoClass == FILE_FULL_EA_INFORMATION || @@ -1984,7 +1988,12 @@ int smb2_tree_connect(struct ksmbd_work *work) write_unlock(&sess->tree_conns_lock); rsp->StructureSize = cpu_to_le16(16); out_err1: - rsp->Capabilities = 0; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE && + test_share_config_flag(share, + KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY)) + rsp->Capabilities = SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY; + else + rsp->Capabilities = 0; rsp->Reserved = 0; /* default manual caching */ rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING; @@ -3498,7 +3507,9 @@ int smb2_open(struct ksmbd_work *work) memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE); if (dh_info.type == DURABLE_REQ_V2 || dh_info.type == DURABLE_REQ) { - if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent) + if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent && + test_share_config_flag(work->tcon->share_conf, + KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY)) fp->is_persistent = true; else fp->is_durable = true; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 22f0f3db3ac92..51b1b0bed616e 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -754,10 +754,15 @@ retry: goto out4; } + /* + * explicitly handle file overwrite case, for compatibility with + * filesystems that may not support rename flags (e.g: fuse) + */ if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) { err = -EEXIST; goto out4; } + flags &= ~(RENAME_NOREPLACE); if (old_child == trap) { err = -EINVAL; diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index aa3411354e66d..16bd693d0b3aa 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -48,6 +48,10 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode, gid_t i_gid; int err; + inode->i_ino = le32_to_cpu(sqsh_ino->inode_number); + if (inode->i_ino == 0) + return -EINVAL; + err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid); if (err) return err; @@ -58,7 +62,6 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode, i_uid_write(inode, i_uid); i_gid_write(inode, i_gid); - inode->i_ino = le32_to_cpu(sqsh_ino->inode_number); inode_set_mtime(inode, le32_to_cpu(sqsh_ino->mtime), 0); inode_set_atime(inode, inode_get_mtime_sec(inode), 0); inode_set_ctime(inode, inode_get_mtime_sec(inode), 0); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 6b7652fb80505..7cd64021d453d 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -463,6 +463,8 @@ struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, kn = kernfs_find_and_get(kobj->sd, attr->name); if (kn) kernfs_break_active_protection(kn); + else + kobject_put(kobj); return kn; } EXPORT_SYMBOL_GPL(sysfs_break_active_protection); |