aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorColy Li <colyli@suse.de>2023-03-04 00:12:38 +0800
committerColy Li <colyli@suse.de>2023-03-04 00:12:38 +0800
commit62192d0140511f8489265a1e81ca2543e20c37cd (patch)
tree4431656ed1325f92014814c49b117f6ec21455a2
parent94559fbf1632bd111ccfdd449dcd346d36fe553c (diff)
downloadbcache-patches-62192d0140511f8489265a1e81ca2543e20c37cd.tar.gz
update for-next and for-test
-rw-r--r--for-next/20221207_ye_xingchen_bcache_convert_to_use_sysfs_emit_sysfs_emit_at_apis.mbx74
-rw-r--r--for-next/20230214_linux_bcache_make_kobj_type_structures_constant.mbx64
-rw-r--r--for-next/20230225_andrea_tomassetti_opensource_bcache_remove_dead_references_to_cache_readaheads.mbx47
-rw-r--r--for-next/[PATCH 1_2] bcache_ fixup btree_cache_wait list damage.eml171
-rw-r--r--for-next/v3_20230217_zyytlz_wz_bcache_remove_some_unnecessary_null_point_check_for_the_return_value_of___bch_.mbx101
-rw-r--r--for-next/v3_20230218_zyytlz_wz_bcache_fix___bch_btree_node_alloc_to_make_the_failure_behavior_consistent.mbx50
-rw-r--r--for-test/Re_ [RFC] Live resize of backing device.eml192
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0001-bcache-add-initial-data-structures-for-nvm-pages.patch343
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0002-bcache-initialize-the-nvm-pages-allocator.patch535
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0003-bcache-initialization-of-the-buddy.patch358
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0004-bcache-bch_nvmpg_alloc_pages-of-the-buddy.patch309
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0005-bcache-bch_nvmpg_free_pages-of-the-buddy-allocator.patch252
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0006-bcache-get-recs-list-head-for-allocated-pages-by-specific-uuid.patch67
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0007-bcache-use-bucket-index-to-set-GC_MARK_METADATA-for-journal-buckets-in-bch_btree_gc_finish.patch48
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0008-bcache-add-bch_nvmpg_flush-to-flush-LLC-of-NVDIMM-pages.patch64
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0009-bcache-add-BCH_FEATURE_INCOMPAT_NVDIMM_META-into-incompat-feature-set.patch60
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0010-bcache-initialize-bcache-journal-for-NVDIMM-meta-device.patch257
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0011-bcache-support-storing-bcache-journal-into-NVDIMM-meta-device.patch232
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0012-bcache-read-jset-from-NVDIMM-pages-for-journal-replay.patch177
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0013-bcache-add-sysfs-interface-register_nvdimm_meta-to-register-NVDIMM-meta-device.patch84
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0014-bcache-add-helper-routines-to-convert-bkey-and-nvmpg-offset.patch169
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0015-bcache-add-KEY_NVMPG-bit-in-KEY_FIELD.patch30
-rw-r--r--for-test/nvdimm-support/meta-dev-20230303/0016-bcache-support-storing-bcache-btree-nodes-into-NVDIMM-meta-device.patch560
23 files changed, 4244 insertions, 0 deletions
diff --git a/for-next/20221207_ye_xingchen_bcache_convert_to_use_sysfs_emit_sysfs_emit_at_apis.mbx b/for-next/20221207_ye_xingchen_bcache_convert_to_use_sysfs_emit_sysfs_emit_at_apis.mbx
new file mode 100644
index 0000000..d054edb
--- /dev/null
+++ b/for-next/20221207_ye_xingchen_bcache_convert_to_use_sysfs_emit_sysfs_emit_at_apis.mbx
@@ -0,0 +1,74 @@
+From git@z Thu Jan 1 00:00:00 1970
+Subject: [PATCH] bcache: Convert to use sysfs_emit()/sysfs_emit_at() APIs
+From: ye.xingchen@zte.com.cn <ye.xingchen@zte.com.cn>
+Date: Wed, 07 Dec 2022 17:02:35 +0800
+Message-Id: <202212071702359325169@zte.com.cn>
+To: <colyli@suse.de>
+Cc: <kent.overstreet@gmail.com>, <linux-bcache@vger.kernel.org>, <linux-kernel@vger.kernel.org>
+List-Id: <linux-bcache.vger.kernel.org>
+MIME-Version: 1.0
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 7bit
+
+From: ye xingchen <ye.xingchen@zte.com.cn>
+
+Follow the advice of the Documentation/filesystems/sysfs.rst and show()
+should only use sysfs_emit() or sysfs_emit_at() when formatting the
+value to be returned to user space.
+
+Signed-off-by: ye xingchen <ye.xingchen@zte.com.cn>
+---
+ drivers/md/bcache/sysfs.c | 31 +++++++++++++++----------------
+ 1 file changed, 15 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
+index c6f677059214..0e2c1880f60b 100644
+--- a/drivers/md/bcache/sysfs.c
++++ b/drivers/md/bcache/sysfs.c
+@@ -1111,26 +1111,25 @@ SHOW(__bch_cache)
+
+ vfree(p);
+
+- ret = scnprintf(buf, PAGE_SIZE,
+- "Unused: %zu%%\n"
+- "Clean: %zu%%\n"
+- "Dirty: %zu%%\n"
+- "Metadata: %zu%%\n"
+- "Average: %llu\n"
+- "Sectors per Q: %zu\n"
+- "Quantiles: [",
+- unused * 100 / (size_t) ca->sb.nbuckets,
+- available * 100 / (size_t) ca->sb.nbuckets,
+- dirty * 100 / (size_t) ca->sb.nbuckets,
+- meta * 100 / (size_t) ca->sb.nbuckets, sum,
+- n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));
++ ret = sysfs_emit(buf,
++ "Unused: %zu%%\n"
++ "Clean: %zu%%\n"
++ "Dirty: %zu%%\n"
++ "Metadata: %zu%%\n"
++ "Average: %llu\n"
++ "Sectors per Q: %zu\n"
++ "Quantiles: [",
++ unused * 100 / (size_t) ca->sb.nbuckets,
++ available * 100 / (size_t) ca->sb.nbuckets,
++ dirty * 100 / (size_t) ca->sb.nbuckets,
++ meta * 100 / (size_t) ca->sb.nbuckets, sum,
++ n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));
+
+ for (i = 0; i < ARRAY_SIZE(q); i++)
+- ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+- "%u ", q[i]);
++ ret += sysfs_emit_at(buf, ret, "%u ", q[i]);
+ ret--;
+
+- ret += scnprintf(buf + ret, PAGE_SIZE - ret, "]\n");
++ ret += sysfs_emit_at(buf, ret, "]\n");
+
+ return ret;
+ }
+
+--
+2.25.1
+
+
diff --git a/for-next/20230214_linux_bcache_make_kobj_type_structures_constant.mbx b/for-next/20230214_linux_bcache_make_kobj_type_structures_constant.mbx
new file mode 100644
index 0000000..2d70d11
--- /dev/null
+++ b/for-next/20230214_linux_bcache_make_kobj_type_structures_constant.mbx
@@ -0,0 +1,64 @@
+From git@z Thu Jan 1 00:00:00 1970
+Subject: [PATCH] bcache: make kobj_type structures constant
+From: Thomas Weißschuh <linux@weissschuh.net>
+Date: Tue, 14 Feb 2023 03:13:39 +0000
+Message-Id: <20230214-kobj_type-bcache-v1-1-cf00ead7bee7@weissschuh.net>
+MIME-Version: 1.0
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 8bit
+
+Since commit ee6d3dd4ed48 ("driver core: make kobj_type constant.")
+the driver core allows the usage of const struct kobj_type.
+
+Take advantage of this to constify the structure definitions to prevent
+modification at runtime.
+
+Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
+---
+ drivers/md/bcache/bcache.h | 10 +++++-----
+ drivers/md/bcache/sysfs.h | 2 +-
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
+index aebb7ef10e63..a522f4f1f992 100644
+--- a/drivers/md/bcache/bcache.h
++++ b/drivers/md/bcache/bcache.h
+@@ -1004,11 +1004,11 @@ extern struct workqueue_struct *bch_flush_wq;
+ extern struct mutex bch_register_lock;
+ extern struct list_head bch_cache_sets;
+
+-extern struct kobj_type bch_cached_dev_ktype;
+-extern struct kobj_type bch_flash_dev_ktype;
+-extern struct kobj_type bch_cache_set_ktype;
+-extern struct kobj_type bch_cache_set_internal_ktype;
+-extern struct kobj_type bch_cache_ktype;
++extern const struct kobj_type bch_cached_dev_ktype;
++extern const struct kobj_type bch_flash_dev_ktype;
++extern const struct kobj_type bch_cache_set_ktype;
++extern const struct kobj_type bch_cache_set_internal_ktype;
++extern const struct kobj_type bch_cache_ktype;
+
+ void bch_cached_dev_release(struct kobject *kobj);
+ void bch_flash_dev_release(struct kobject *kobj);
+diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h
+index a2ff6447b699..65b8bd975ab1 100644
+--- a/drivers/md/bcache/sysfs.h
++++ b/drivers/md/bcache/sysfs.h
+@@ -3,7 +3,7 @@
+ #define _BCACHE_SYSFS_H_
+
+ #define KTYPE(type) \
+-struct kobj_type type ## _ktype = { \
++const struct kobj_type type ## _ktype = { \
+ .release = type ## _release, \
+ .sysfs_ops = &((const struct sysfs_ops) { \
+ .show = type ## _show, \
+
+---
+base-commit: f6feea56f66d34259c4222fa02e8171c4f2673d1
+change-id: 20230214-kobj_type-bcache-6d2bd129b0fa
+
+Best regards,
+--
+Thomas Weißschuh <linux@weissschuh.net>
+
diff --git a/for-next/20230225_andrea_tomassetti_opensource_bcache_remove_dead_references_to_cache_readaheads.mbx b/for-next/20230225_andrea_tomassetti_opensource_bcache_remove_dead_references_to_cache_readaheads.mbx
new file mode 100644
index 0000000..650d185
--- /dev/null
+++ b/for-next/20230225_andrea_tomassetti_opensource_bcache_remove_dead_references_to_cache_readaheads.mbx
@@ -0,0 +1,47 @@
+From git@z Thu Jan 1 00:00:00 1970
+Subject: [PATCH] bcache: Remove dead references to cache_readaheads
+From: Andrea Tomassetti <andrea.tomassetti-opensource@devo.com>
+Date: Sat, 25 Feb 2023 16:33:55 +0100
+Message-Id: <20230225153355.2779474-1-andrea.tomassetti-opensource@devo.com>
+MIME-Version: 1.0
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 7bit
+
+The cache_readaheads stat counter is not used anymore and should be
+removed.
+
+Signed-off-by: Andrea Tomassetti <andrea.tomassetti-opensource@devo.com>
+---
+ Documentation/admin-guide/bcache.rst | 3 ---
+ drivers/md/bcache/stats.h | 1 -
+ 2 files changed, 4 deletions(-)
+
+diff --git a/Documentation/admin-guide/bcache.rst b/Documentation/admin-guide/bcache.rst
+index bb5032a99234..6fdb495ac466 100644
+--- a/Documentation/admin-guide/bcache.rst
++++ b/Documentation/admin-guide/bcache.rst
+@@ -508,9 +508,6 @@ cache_miss_collisions
+ cache miss, but raced with a write and data was already present (usually 0
+ since the synchronization for cache misses was rewritten)
+
+-cache_readaheads
+- Count of times readahead occurred.
+-
+ Sysfs - cache set
+ ~~~~~~~~~~~~~~~~~
+
+diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h
+index bd3afc856d53..21b445f8af15 100644
+--- a/drivers/md/bcache/stats.h
++++ b/drivers/md/bcache/stats.h
+@@ -18,7 +18,6 @@ struct cache_stats {
+ unsigned long cache_misses;
+ unsigned long cache_bypass_hits;
+ unsigned long cache_bypass_misses;
+- unsigned long cache_readaheads;
+ unsigned long cache_miss_collisions;
+ unsigned long sectors_bypassed;
+
+--
+2.39.2
+
diff --git a/for-next/[PATCH 1_2] bcache_ fixup btree_cache_wait list damage.eml b/for-next/[PATCH 1_2] bcache_ fixup btree_cache_wait list damage.eml
new file mode 100644
index 0000000..25188fd
--- /dev/null
+++ b/for-next/[PATCH 1_2] bcache_ fixup btree_cache_wait list damage.eml
@@ -0,0 +1,171 @@
+Return-Path: <mingzhe.zou@easystack.cn>
+Delivered-To: colyli
+Received: from dovecot-director2.suse.de ([192.168.254.65])
+ (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits))
+ by imap2.suse-dmz.suse.de with LMTPS
+ id tcrmJrvvRmK1NwAAMHmgww
+ (envelope-from <mingzhe.zou@easystack.cn>)
+ for <colyli>; Fri, 01 Apr 2022 12:27:39 +0000
+Received: from relay2.suse.de ([149.44.160.134])
+ (using TLSv1.2 with cipher ECDHE-ECDSA-AES128-GCM-SHA256 (128/128 bits))
+ by dovecot-director2.suse.de with LMTPS
+ id KGCAJLvvRmJsTQAApTUePA
+ (envelope-from <mingzhe.zou@easystack.cn>)
+ for <colyli@imap.suse.de>; Fri, 01 Apr 2022 12:27:39 +0000
+Received: from relay2.suse.de (localhost [127.0.0.1])
+ by relay2.suse.de (Postfix) with ESMTP id 8C5A4A3B89
+ for <colyli@imap.suse.de>; Fri, 1 Apr 2022 12:27:39 +0000 (UTC)
+X-Virus-Scanned: by amavisd-new at relay2.suse.de
+X-Spam-Flag: NO
+X-Spam-Score: 0.77
+X-Spam-Level:
+X-Spam-Status: No, score=0.77 tagged_above=-9999 required=5
+ tests=[BAYES_50=0.8, RCVD_IN_DNSWL_NONE=-0.0001,
+ RCVD_IN_MSPIKE_H4=-0.01, RCVD_IN_MSPIKE_WL=-0.01,
+ T_SCC_BODY_TEXT_LINE=-0.01] autolearn=no autolearn_force=no
+Received: from relay2.suse.de ([127.0.0.1])
+ by relay2.suse.de (relay2.suse.de [127.0.0.1]) (amavisd-new, port 10026)
+ with ESMTP id tYeLYEP5FXWf for <colyli@imap.suse.de>;
+ Fri, 1 Apr 2022 12:27:33 +0000 (UTC)
+Received: from mx2.suse.de (unknown [149.44.161.68])
+ (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
+ (No client certificate requested)
+ by relay2.suse.de (Postfix) with ESMTPS id 9FD91A3B95
+ for <colyli@imap.suse.de>; Fri, 1 Apr 2022 12:27:32 +0000 (UTC)
+Received: from mail-m2835.qiye.163.com (mail-m2835.qiye.163.com [103.74.28.35])
+ (using TLSv1.2 with cipher ECDHE-ECDSA-AES128-GCM-SHA256 (128/128 bits))
+ (No client certificate requested)
+ by mx2.suse.de (Postfix) with ESMTPS id 5B880AD0E
+ for <colyli@suse.de>; Fri, 1 Apr 2022 12:27:30 +0000 (UTC)
+Received: from localhost.localdomain (unknown [218.94.118.90])
+ by mail-m2835.qiye.163.com (Hmail) with ESMTPA id 1FCC38A07D6;
+ Fri, 1 Apr 2022 20:27:28 +0800 (CST)
+From: mingzhe.zou@easystack.cn
+To: colyli@suse.de,
+ linux-bcache@vger.kernel.org
+Cc: zoumingzhe@qq.com,
+ ZouMingzhe <mingzhe.zou@easystack.cn>
+Subject: [PATCH 1/2] bcache: fixup btree_cache_wait list damage
+Date: Fri, 1 Apr 2022 20:27:24 +0800
+Message-Id: <20220401122725.17725-1-mingzhe.zou@easystack.cn>
+X-Mailer: git-send-email 2.17.1
+X-HM-Spam-Status: e1kfGhgUHx5ZQUtXWQgPGg8OCBgUHx5ZQUlOS1dZCBgUCR5ZQVlLVUtZV1
+ kWDxoPAgseWUFZKDYvK1lXWShZQUlCN1dZLVlBSVdZDwkaFQgSH1lBWRpKSUhWHRpMTU4fTBodSk
+ 1LVRkRExYaEhckFA4PWVdZFhoPEhUdFFlBWU9LSFVKSktISkxVS1kG
+X-HM-Sender-Digest: e1kMHhlZQR0aFwgeV1kSHx4VD1lBWUc6ODY6Tjo*ATIoFExDGDYvDhM2
+ KzwwFFFVSlVKTU9DQ0pNS09DT0xDVTMWGhIXVRYSFRwBEx5VARQOOx4aCAIIDxoYEFUYFUVZV1kS
+ C1lBWUlKQ1VCT1VKSkNVQktZV1kIAVlBT0JKSzcG
+X-HM-Tid: 0a7fe518488e841dkuqw1fcc38a07d6
+
+From: ZouMingzhe <mingzhe.zou@easystack.cn>
+
+We get a kernel crash about "list_add corruption. next->prev should be
+prev (ffff9c801bc01210), but was ffff9c77b688237c. (next=ffffae586d8afe68)."
+
+crash> struct list_head 0xffff9c801bc01210
+struct list_head {
+ next = 0xffffae586d8afe68,
+ prev = 0xffffae586d8afe68
+}
+crash> struct list_head 0xffff9c77b688237c
+struct list_head {
+ next = 0x0,
+ prev = 0x0
+}
+crash> struct list_head 0xffffae586d8afe68
+struct list_head struct: invalid kernel virtual address: ffffae586d8afe68 type: "gdb_readmem_callback"
+Cannot access memory at address 0xffffae586d8afe68
+
+[230469.019492] Call Trace:
+[230469.032041] prepare_to_wait+0x8a/0xb0
+[230469.044363] ? bch_btree_keys_free+0x6c/0xc0 [escache]
+[230469.056533] mca_cannibalize_lock+0x72/0x90 [escache]
+[230469.068788] mca_alloc+0x2ae/0x450 [escache]
+[230469.080790] bch_btree_node_get+0x136/0x2d0 [escache]
+[230469.092681] bch_btree_check_thread+0x1e1/0x260 [escache]
+[230469.104382] ? finish_wait+0x80/0x80
+[230469.115884] ? bch_btree_check_recurse+0x1a0/0x1a0 [escache]
+[230469.127259] kthread+0x112/0x130
+[230469.138448] ? kthread_flush_work_fn+0x10/0x10
+[230469.149477] ret_from_fork+0x35/0x40
+
+bch_btree_check_thread() and bch_dirty_init_thread() maybe call
+mca_cannibalize() to cannibalize other cached btree nodes. Only
+one thread can do it at a time, so the op of other threads will
+be added to the btree_cache_wait list.
+
+We must call finish_wait() to remove op from btree_cache_wait
+before free it's memory address. Otherwise, the list will be
+damaged. Also should call bch_cannibalize_unlock() to release
+the btree_cache_alloc_lock and wake_up other waiters.
+
+Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn>
+---
+ drivers/md/bcache/btree.c | 10 +++++++++-
+ drivers/md/bcache/btree.h | 2 ++
+ drivers/md/bcache/writeback.c | 8 ++++++++
+ 3 files changed, 19 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
+index ad9f16689419..f8e6f5c7c736 100644
+--- a/drivers/md/bcache/btree.c
++++ b/drivers/md/bcache/btree.c
+@@ -885,7 +885,7 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op,
+ * cannibalize_bucket() will take. This means every time we unlock the root of
+ * the btree, we need to release this lock if we have it held.
+ */
+-static void bch_cannibalize_unlock(struct cache_set *c)
++void bch_cannibalize_unlock(struct cache_set *c)
+ {
+ spin_lock(&c->btree_cannibalize_lock);
+ if (c->btree_cache_alloc_lock == current) {
+@@ -1968,6 +1968,14 @@ static int bch_btree_check_thread(void *arg)
+ c->gc_stats.nodes++;
+ bch_btree_op_init(&op, 0);
+ ret = bcache_btree(check_recurse, p, c->root, &op);
++ /* The op may be added to cache_set's btree_cache_wait
++ * in mca_cannibalize(), must ensure it is removed from
++ * the list and release btree_cache_alloc_lock before
++ * free op memory.
++ * Otherwise, the btree_cache_wait will be damaged.
++ */
++ bch_cannibalize_unlock(c);
++ finish_wait(&c->btree_cache_wait, &(&op)->wait);
+ if (ret)
+ goto out;
+ }
+diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
+index 50482107134f..435e82574ac3 100644
+--- a/drivers/md/bcache/btree.h
++++ b/drivers/md/bcache/btree.h
+@@ -365,6 +365,8 @@ static inline void force_wake_up_gc(struct cache_set *c)
+ _r; \
+ })
+
++void bch_cannibalize_unlock(struct cache_set *c);
++
+ #define MAP_DONE 0
+ #define MAP_CONTINUE 1
+
+diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
+index 9ee0005874cd..5b828555bca8 100644
+--- a/drivers/md/bcache/writeback.c
++++ b/drivers/md/bcache/writeback.c
+@@ -865,6 +865,14 @@ static int bch_root_node_dirty_init(struct cache_set *c,
+ }
+ } while (ret == -EAGAIN);
+
++ /* The op may be added to cache_set's btree_cache_wait
++ * in mca_cannibalize(), must ensure it is removed from
++ * the list and release btree_cache_alloc_lock before
++ * free op memory.
++ * Otherwise, the btree_cache_wait will be damaged.
++ */
++ bch_cannibalize_unlock(c);
++ finish_wait(&c->btree_cache_wait, &(&op.op)->wait);
+ return ret;
+ }
+
+--
+2.17.1
+
diff --git a/for-next/v3_20230217_zyytlz_wz_bcache_remove_some_unnecessary_null_point_check_for_the_return_value_of___bch_.mbx b/for-next/v3_20230217_zyytlz_wz_bcache_remove_some_unnecessary_null_point_check_for_the_return_value_of___bch_.mbx
new file mode 100644
index 0000000..48f99a9
--- /dev/null
+++ b/for-next/v3_20230217_zyytlz_wz_bcache_remove_some_unnecessary_null_point_check_for_the_return_value_of___bch_.mbx
@@ -0,0 +1,101 @@
+From git@z Thu Jan 1 00:00:00 1970
+Subject: [PATCH v3] bcache: Remove some unnecessary NULL point check for
+ the return value of __bch_btree_node_alloc-related pointer
+From: Zheng Wang <zyytlz.wz@163.com>
+Date: Fri, 17 Feb 2023 18:09:01 +0800
+Message-Id: <20230217100901.707245-1-zyytlz.wz@163.com>
+MIME-Version: 1.0
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 7bit
+
+Due to the previously fix of __bch_btree_node_alloc, the return value will
+never be a NULL pointer. So IS_ERR is enough to handle the failure
+ situation. Fix it by replacing IS_ERR_OR_NULL check to IS_ERR check.
+
+Fixes: cafe56359144 ("bcache: A block layer cache")
+Cc: stable@vger.kernel.org
+Signed-off-by: Zheng Wang <zyytlz.wz@163.com>
+---
+v3:
+- Add Cc: stable@vger.kernel.org suggested by Eric
+v2:
+- Replace more checks
+---
+ drivers/md/bcache/btree.c | 10 +++++-----
+ drivers/md/bcache/super.c | 4 ++--
+ 2 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
+index 147c493a989a..7c21e54468bf 100644
+--- a/drivers/md/bcache/btree.c
++++ b/drivers/md/bcache/btree.c
+@@ -1138,7 +1138,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b,
+ {
+ struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
+
+- if (!IS_ERR_OR_NULL(n)) {
++ if (!IS_ERR(n)) {
+ mutex_lock(&n->write_lock);
+ bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
+ bkey_copy_key(&n->key, &b->key);
+@@ -1340,7 +1340,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
+ memset(new_nodes, 0, sizeof(new_nodes));
+ closure_init_stack(&cl);
+
+- while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b))
++ while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b))
+ keys += r[nodes++].keys;
+
+ blocks = btree_default_blocks(b->c) * 2 / 3;
+@@ -1352,7 +1352,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
+
+ for (i = 0; i < nodes; i++) {
+ new_nodes[i] = btree_node_alloc_replacement(r[i].b, NULL);
+- if (IS_ERR_OR_NULL(new_nodes[i]))
++ if (IS_ERR(new_nodes[i]))
+ goto out_nocoalesce;
+ }
+
+@@ -1487,7 +1487,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
+ bch_keylist_free(&keylist);
+
+ for (i = 0; i < nodes; i++)
+- if (!IS_ERR_OR_NULL(new_nodes[i])) {
++ if (!IS_ERR(new_nodes[i])) {
+ btree_node_free(new_nodes[i]);
+ rw_unlock(true, new_nodes[i]);
+ }
+@@ -1669,7 +1669,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op,
+ if (should_rewrite) {
+ n = btree_node_alloc_replacement(b, NULL);
+
+- if (!IS_ERR_OR_NULL(n)) {
++ if (!IS_ERR(n)) {
+ bch_btree_node_write_sync(n);
+
+ bch_btree_set_root(n);
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index ba3909bb6bea..7660962e7b8b 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -1724,7 +1724,7 @@ static void cache_set_flush(struct closure *cl)
+ if (!IS_ERR_OR_NULL(c->gc_thread))
+ kthread_stop(c->gc_thread);
+
+- if (!IS_ERR_OR_NULL(c->root))
++ if (!IS_ERR(c->root))
+ list_add(&c->root->list, &c->btree_cache);
+
+ /*
+@@ -2088,7 +2088,7 @@ static int run_cache_set(struct cache_set *c)
+
+ err = "cannot allocate new btree root";
+ c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL);
+- if (IS_ERR_OR_NULL(c->root))
++ if (IS_ERR(c->root))
+ goto err;
+
+ mutex_lock(&c->root->write_lock);
+--
+2.25.1
+
diff --git a/for-next/v3_20230218_zyytlz_wz_bcache_fix___bch_btree_node_alloc_to_make_the_failure_behavior_consistent.mbx b/for-next/v3_20230218_zyytlz_wz_bcache_fix___bch_btree_node_alloc_to_make_the_failure_behavior_consistent.mbx
new file mode 100644
index 0000000..9aef3d4
--- /dev/null
+++ b/for-next/v3_20230218_zyytlz_wz_bcache_fix___bch_btree_node_alloc_to_make_the_failure_behavior_consistent.mbx
@@ -0,0 +1,50 @@
+From git@z Thu Jan 1 00:00:00 1970
+Subject: [PATCH v3] bcache: Fix __bch_btree_node_alloc to make the failure
+ behavior consistent
+From: Zheng Wang <zyytlz.wz@163.com>
+Date: Sat, 18 Feb 2023 15:23:35 +0800
+Message-Id: <20230218072335.1537099-1-zyytlz.wz@163.com>
+MIME-Version: 1.0
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 7bit
+
+In some specific situation, the return value of __bch_btree_node_alloc may
+be NULL. This may lead to poential NULL pointer dereference in caller
+ function like a calling chaion :
+ btree_split->bch_btree_node_alloc->__bch_btree_node_alloc.
+
+Fix it by initialize return value in __bch_btree_node_alloc before return.
+
+Fixes: cafe56359144 ("bcache: A block layer cache")
+Cc: stable@vger.kernel.org
+Signed-off-by: Zheng Wang <zyytlz.wz@163.com>
+---
+v3:
+- Add Cc: stable@vger.kernel.org suggested by Eric
+v2:
+- split patch v1 into two patches to make it clearer suggested by Coly Li
+---
+ drivers/md/bcache/btree.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
+index 147c493a989a..cae25e74b9e0 100644
+--- a/drivers/md/bcache/btree.c
++++ b/drivers/md/bcache/btree.c
+@@ -1090,10 +1090,12 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
+ struct btree *parent)
+ {
+ BKEY_PADDED(key) k;
+- struct btree *b = ERR_PTR(-EAGAIN);
++ struct btree *b;
+
+ mutex_lock(&c->bucket_lock);
+ retry:
++ /* return ERR_PTR(-EAGAIN) when it fails */
++ b = ERR_PTR(-EAGAIN);
+ if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait))
+ goto err;
+
+--
+2.25.1
+
diff --git a/for-test/Re_ [RFC] Live resize of backing device.eml b/for-test/Re_ [RFC] Live resize of backing device.eml
new file mode 100644
index 0000000..d3f094f
--- /dev/null
+++ b/for-test/Re_ [RFC] Live resize of backing device.eml
@@ -0,0 +1,192 @@
+From: Andrea Tomassetti <andrea.tomassetti-opensource@devo.com>
+Subject: [PATCH v2] bcache: Add support for live resize of backing devices
+
+Signed-off-by: Andrea Tomassetti <andrea.tomassetti-opensource@devo.com>
+---
+Hi Coly,
+this is the second version of the patch. As you correctly pointed out,
+I implemented roll-back functionalities in case of error.
+I'm testing this funcionality using QEMU/KVM vm via libvirt.
+Here the steps:
+ 1. make-bcache --writeback -B /dev/vdb -C /dev/vdc
+ 2. mkfs.xfs /dev/bcache0
+ 3. mount /dev/bcache0 /mnt
+ 3. dd if=/dev/random of=/mnt/random0 bs=1M count=1000
+ 4. md5sum /mnt/random0 | tee /mnt/random0.md5
+ 5. [HOST] virsh blockresize <vm-name> --path <disk-path> --size
+<new-size>
+ 6. xfs_growfs /dev/bcache0
+ 6. Repeat steps 3 and 4 with a different file name (e.g. random1.md5)
+ 7. umount/reboot/remount and check that the md5 hashes are correct with
+ md5sum -c /mnt/random?.md5
+
+ drivers/md/bcache/super.c | 84 ++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 83 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index ba3909bb6bea..1435a3f605f8 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -2443,6 +2443,85 @@ static bool bch_is_open(dev_t dev)
+ return bch_is_open_cache(dev) || bch_is_open_backing(dev);
+ }
+
++static bool bch_update_capacity(dev_t dev)
++{
++ const size_t max_stripes = min_t(size_t, INT_MAX,
++ SIZE_MAX / sizeof(atomic_t));
++
++ uint64_t n, n_old, orig_cached_sectors = 0;
++ void *tmp_realloc;
++
++ int nr_stripes_old;
++ bool res = false;
++
++ struct bcache_device *d;
++ struct cache_set *c, *tc;
++ struct cached_dev *dcp, *t, *dc = NULL;
++
++ uint64_t parent_nr_sectors;
++
++ list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
++ list_for_each_entry_safe(dcp, t, &c->cached_devs, list)
++ if (dcp->bdev->bd_dev == dev) {
++ dc = dcp;
++ goto dc_found;
++ }
++
++dc_found:
++ if (!dc)
++ return false;
++
++ parent_nr_sectors = bdev_nr_sectors(dc->bdev) - dc->sb.data_offset;
++
++ if (parent_nr_sectors == bdev_nr_sectors(dc->disk.disk->part0))
++ return false;
++
++ d = &dc->disk;
++ orig_cached_sectors = d->c->cached_dev_sectors;
++
++ /* Force cached device sectors re-calc */
++ calc_cached_dev_sectors(d->c);
++
++ /* Block writeback thread */
++ down_write(&dc->writeback_lock);
++ nr_stripes_old = d->nr_stripes;
++ n = DIV_ROUND_UP_ULL(parent_nr_sectors, d->stripe_size);
++ if (!n || n > max_stripes) {
++ pr_err("nr_stripes too large or invalid: %llu (start sector beyond
+end of disk?)\n",
++ n);
++ goto restore_dev_sectors;
++ }
++ d->nr_stripes = n;
++
++ n = d->nr_stripes * sizeof(atomic_t);
++ n_old = nr_stripes_old * sizeof(atomic_t);
++ tmp_realloc = kvrealloc(d->stripe_sectors_dirty, n_old,
++ n, GFP_KERNEL);
++ if (!tmp_realloc)
++ goto restore_nr_stripes;
++
++ d->stripe_sectors_dirty = (atomic_t *) tmp_realloc;
++
++ n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long);
++ n_old = BITS_TO_LONGS(nr_stripes_old) * sizeof(unsigned long);
++ tmp_realloc = kvrealloc(d->full_dirty_stripes, n_old, n, GFP_KERNEL);
++ if (!tmp_realloc)
++ goto restore_nr_stripes;
++
++ d->full_dirty_stripes = (unsigned long *) tmp_realloc;
++
++ if ((res = set_capacity_and_notify(dc->disk.disk, parent_nr_sectors)))
++ goto unblock_and_exit;
++
++restore_nr_stripes:
++ d->nr_stripes = nr_stripes_old;
++restore_dev_sectors:
++ d->c->cached_dev_sectors = orig_cached_sectors;
++unblock_and_exit:
++ up_write(&dc->writeback_lock);
++ return res;
++}
++
+ struct async_reg_args {
+ struct delayed_work reg_work;
+ char *path;
+@@ -2569,7 +2648,10 @@ static ssize_t register_bcache(struct kobject *k,
+struct kobj_attribute *attr,
+ mutex_lock(&bch_register_lock);
+ if (lookup_bdev(strim(path), &dev) == 0 &&
+ bch_is_open(dev))
+- err = "device already registered";
++ if (bch_update_capacity(dev))
++ err = "capacity changed";
++ else
++ err = "device already registered";
+ else
+ err = "device busy";
+ mutex_unlock(&bch_register_lock);
+--
+2.39.0
+
+
+
+On 25/1/23 18:59, Coly Li wrote:
+>
+>
+>> 2023年1月25日 18:07,Andrea Tomassetti <andrea.tomassetti-opensource@devo.com> 写道:
+>>
+>> On Tue, Jan 17, 2023 at 5:18 PM Coly Li <colyli@suse.de> wrote:
+>>>>
+>
+>>>>>
+>>>>>> struct async_reg_args {
+>>>>>> struct delayed_work reg_work;
+>>>>>> char *path;
+>>>>>> @@ -2569,7 +2639,10 @@ static ssize_t register_bcache(struct kobject
+>>>>>> *k, struct kobj_attribute *attr,
+>>>>>> mutex_lock(&bch_register_lock);
+>>>>>> if (lookup_bdev(strim(path), &dev) == 0 &&
+>>>>>> bch_is_open(dev))
+>>>>>> - err = "device already registered";
+>>>>>> + if (bch_update_capacity(dev))
+>>>>>> + err = "capacity changed";
+>>>>>> + else
+>>>>>> + err = "device already registered";
+>>>>>
+>>>>>
+>>>>> As I said, it should be a separated write-only sysfile under the cache
+>>>>> device's directory.
+>>>> Can I ask why you don't like the automatic resize way? Why should the
+>>>> resize be manual?
+>>>
+>>> Most of system administrators don’t like such silently automatic things. They want to extend the size explicitly, especially when there is other dependences in their configurations.
+>>>
+>> What I was trying to say is that, in order to resize a block device, a
+>> manual command should be executed. So, this is already a "non-silent"
+>> automatic thing.
+>> Moreover, if the block device has a FS on it, the FS needs to be
+>> manually grown with some special utilities, e.g. xfs_growfs. So,
+>> again, another non-silent automatic step. Don't you agree?
+>> For example, to resize a qcow device attached to a VM I'm manually
+>> doing a `virsh blockresize`. As soon as I issue that command, the
+>> virtio_blk driver inside the VM detects the disk size change and calls
+>> the `set_capacity_and_notify` function. Why then should bcache behave
+>> differently?
+>
+> The above VM example makes sense, I am almost convinced.
+>
+>>
+>> If you're concerned that this can somehow break the
+>> behaviour-compatibility with older versions of the driver, can we
+>> protect this automatic discovery with an optional parameter? Will this
+>> be an option you will take into account?
+>
+> Then let’s forget the option sysfs at this moment. Once you feel the patch is ready for me to testing, please notice me with detailed steps to redo your testing.
+> At that time during my testing, let’s discuss whether an extra option is necesssary, for now just keep your idea as automatically resize the cached device.
+>
+> Thanks for your detailed explanation.
+>
+> Coly Li
+>
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0001-bcache-add-initial-data-structures-for-nvm-pages.patch b/for-test/nvdimm-support/meta-dev-20230303/0001-bcache-add-initial-data-structures-for-nvm-pages.patch
new file mode 100644
index 0000000..5758371
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0001-bcache-add-initial-data-structures-for-nvm-pages.patch
@@ -0,0 +1,343 @@
+From e9147021c678184512de1776d163b5a994a209a3 Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Mon, 26 Jul 2021 00:26:28 +0800
+Subject: [PATCH 01/16] bcache: add initial data structures for nvm pages
+
+This patch initializes the prototype data structures for nvm pages
+allocator,
+
+- struct bch_nvmpg_sb
+ This is the super block allocated on each nvdimm namespace for the nvm
+pages allocator. A nvdimm pages allocator set may have multiple name-
+spaces, bch_nvmpg_sb->set_uuid is used to mark which nvdimm set this
+namespace belongs to.
+
+- struct bch_nvmpg_header
+ This is a table for all heads of all allocation record lists. An allo-
+cation record list traces all page(s) allocated from nvdimm namespace(s)
+to a specific requester (identified by uuid). After system reboot, a
+requester can retrieve all previously allocated nvdimm pages from its
+record list by a pre-defined uuid.
+
+- struct bch_nvmpg_head
+ This is a head of an allocation record list. Each nvdimm pages
+requester (typically it's a driver) has and only has one allocation
+record list, and an allocated nvdimm page only belongs to a specific
+allocation record list. Member uuid[] will be set as the requester's
+uuid, e.g. for bcache it is the cache set uuid. Member label is not
+mandatory, it is a human-readable string for debug purpose. The nvm
+offset format pointers recs_offset[] point to the location of actual
+allocator record lists on each namespace of the nvdimm pages allocator
+set. Each per namespace record list is represented by the following
+struct bch_nvmpg_recs.
+
+- struct bch_nvmpg_recs
+ This structure represents a requester's allocation record list. Member
+uuid is same value as the uuid of its corresponding struct
+bch_nvmpg_head. Member recs[] is a table of struct bch_pgalloc_rec
+objects to trace all allocated nvmdimm pages. If the table recs[] is
+full, the nvmpg format offset is a pointer points to the next struct
+bch_nvmpg_recs object, nvm pages allocator will look for available free
+allocation record there. All the linked struct bch_nvmpg_recs objects
+compose a requester's alloction record list which is headed by the above
+struct bch_nvmpg_head.
+
+- struct bch_nvmpg_recs
+ This structure records a range of allocated nvdimm pages. Member pgoff
+is offset in unit of page size of this allocation range. Member order
+indicates size of the allocation range by (1 << order) in unit of page
+size. Because the nvdimm pages allocator set may have multiple nvdimm
+namespaces, member ns_id is used to identify which namespace the pgoff
+belongs to.
+ - Bits 0 - 51: pgoff - is pages offset of the allocated pages.
+ - Bits 52 - 57: order - allocaed size in page_size * order-of-2
+ - Bits 58 - 60: ns_id - identify which namespace the pages stays on
+ - Bits 61 - 63: reserved.
+Since each of the allocated nvm pages are power of 2, using 6 bits to
+represent allocated size can have (1<<(1<<64) - 1) * PAGE_SIZE maximum
+value. It can be a 76 bits width range size in byte for 4KB page size,
+which is large enough currently.
+
+All the structure members having _offset suffix are in a special fomat.
+E.g. bch_nvmpg_sb.{sb_offset, pages_offset, set_header_offset},
+bch_nvmpg_head.recs_offset, bch_nvmpg_recs.{head_offset, next_offset},
+the offset value is 64bit, the most significant 3 bits are used to
+identify which namespace this offset belongs to, and the rested 61 bits
+are actual offset inside the namespace. Following patches will have
+helper routines to do the conversion between memory pointer and offset.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Jianpeng Ma <jianpeng.ma@intel.com>
+Cc: Qiaowei Ren <qiaowei.ren@intel.com>
+Cc: Ying Huang <ying.huang@intel.com>
+---
+ drivers/md/bcache/nvmpg_format.h | 253 +++++++++++++++++++++++++++++++
+ 1 file changed, 253 insertions(+)
+ create mode 100644 drivers/md/bcache/nvmpg_format.h
+
+diff --git a/drivers/md/bcache/nvmpg_format.h b/drivers/md/bcache/nvmpg_format.h
+new file mode 100644
+index 000000000000..e9eb6371fd78
+--- /dev/null
++++ b/drivers/md/bcache/nvmpg_format.h
+@@ -0,0 +1,253 @@
++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
++
++#ifndef _NVMPG_FORMAT_H
++#define _NVMPG_FORMAT_H
++
++/*
++ * Bcache on NVDIMM data structures
++ */
++
++/*
++ * - struct bch_nvmpg_sb
++ * This is the super block allocated on each nvdimm namespace for the nvm
++ * pages allocator. A nvdimm pages allocator set may have multiple namespaces,
++ * bch_nvmpg_sb->set_uuid is used to mark which nvdimm set this name space
++ * belongs to.
++ *
++ * - struct bch_nvmpg_header
++ * This is a table for all heads of all allocation record lists. An allo-
++ * cation record list traces all page(s) allocated from nvdimm namespace(s) to
++ * a specific requester (identified by uuid). After system reboot, a requester
++ * can retrieve all previously allocated nvdimm pages from its record list by a
++ * pre-defined uuid.
++ *
++ * - struct bch_nvmpg_head
++ * This is a head of an allocation record list. Each nvdimm pages requester
++ * (typically it's a driver) has and only has one allocation record list, and
++ * an allocated nvdimm page only bedlones to a specific allocation record list.
++ * Member uuid[] will be set as the requester's uuid, e.g. for bcache it is the
++ * cache set uuid. Member label is not mandatory, it is a human-readable string
++ * for debug purpose. The nvm offset format pointers recs_offset[] point to the
++ * location of actual allocator record lists on each name space of the nvdimm
++ * pages allocator set. Each per name space record list is represented by the
++ * following struct bch_nvmpg_recs.
++ *
++ * - struct bch_nvmpg_recs
++ * This structure represents a requester's allocation record list. Member uuid
++ * is same value as the uuid of its corresponding struct bch_nvmpg_head. Member
++ * recs[] is a table of struct bch_pgalloc_rec objects to trace all allocated
++ * nvmdimm pages. If the table recs[] is full, the nvmpg format offset is a
++ * pointer points to the next struct bch_nvmpg_recs object, nvm pages allocator
++ * will look for available free allocation record there. All the linked
++ * struct bch_nvmpg_recs objects compose a requester's alloction record list
++ * which is headed by the above struct bch_nvmpg_head.
++ *
++ * - struct bch_nvmpg_rec
++ * This structure records a range of allocated nvdimm pages. Member pgoff is
++ * offset in unit of page size of this allocation range. Member order indicates
++ * size of the allocation range by (1 << order) in unit of page size. Because
++ * the nvdimm pages allocator set may have multiple nvdimm name spaces, member
++ * ns_id is used to identify which name space the pgoff belongs to.
++ *
++ * All allocation record lists are stored on the first initialized nvdimm name-
++ * space (ns_id 0). The meta data default layout of nvm pages allocator on
++ * namespace 0 is,
++ *
++ * 0 +---------------------------------+
++ * | |
++ * 4KB +---------------------------------+ <-- BCH_NVMPG_SB_OFFSET
++ * | bch_nvmpg_sb |
++ * 8KB +---------------------------------+ <-- BCH_NVMPG_RECLIST_HEAD_OFFSET
++ * | bch_nvmpg_header |
++ * | |
++ * 16KB +---------------------------------+ <-- BCH_NVMPG_SYSRECS_OFFSET
++ * | bch_nvmpg_recs |
++ * | (nvm pages internal usage) |
++ * 24KB +---------------------------------+
++ * | |
++ * | |
++ * 16MB +---------------------------------+ <-- BCH_NVMPG_START
++ * | allocable nvm pages |
++ * | for buddy allocator |
++ * end +---------------------------------+
++ *
++ *
++ *
++ * Meta data default layout on rested nvdimm namespaces,
++ *
++ * 0 +---------------------------------+
++ * | |
++ * 4KB +---------------------------------+ <-- BCH_NVMPG_SB_OFFSET
++ * | bch_nvmpg_sb |
++ * 8KB +---------------------------------+
++ * | |
++ * | |
++ * | |
++ * | |
++ * | |
++ * | |
++ * 16MB +---------------------------------+ <-- BCH_NVMPG_START
++ * | allocable nvm pages |
++ * | for buddy allocator |
++ * end +---------------------------------+
++ *
++ *
++ * - The nvmpg offset format pointer
++ * All member names ending with _offset in this header are nvmpg offset
++ * format pointer. The offset format is,
++ * [highest 3 bits: ns_id]
++ * [rested 61 bits: offset in No. ns_id namespace]
++ *
++ * The above offset is byte unit, the procedure to reference a nvmpg offset
++ * format pointer is,
++ * 1) Identify the namespace related in-memory structure by ns_id from the
++ * highest 3 bits of offset value.
++ * 2) Get the DAX mapping base address from the in-memory structure.
++ * 3) Calculate the actual memory address on nvdimm by plusing the DAX base
++ * address with offset value in rested low 61 bits.
++ * All related in-memory structure and conversion routines don't belong to
++ * user space api, they are defined by nvm-pages allocator code in
++ * drivers/md/bcache/nvm-pages.{c,h}
++ *
++ */
++
++#include <linux/types.h>
++
++/* In sectors */
++#define BCH_NVMPG_SB_OFFSET 4096
++#define BCH_NVMPG_START (16 << 20)
++
++#define BCH_NVMPG_LBL_SIZE 32
++#define BCH_NVMPG_NS_MAX 8
++
++#define BCH_NVMPG_RECLIST_HEAD_OFFSET (8<<10)
++#define BCH_NVMPG_SYSRECS_OFFSET (16<<10)
++
++#define BCH_NVMPG_SB_VERSION 0
++#define BCH_NVMPG_SB_VERSION_MAX 0
++
++static const __u8 bch_nvmpg_magic[] = {
++ 0x17, 0xbd, 0x53, 0x7f, 0x1b, 0x23, 0xd6, 0x83,
++ 0x46, 0xa4, 0xf8, 0x28, 0x17, 0xda, 0xec, 0xa9 };
++static const __u8 bch_nvmpg_recs_magic[] = {
++ 0x39, 0x25, 0x3f, 0xf7, 0x27, 0x17, 0xd0, 0xb9,
++ 0x10, 0xe6, 0xd2, 0xda, 0x38, 0x68, 0x26, 0xae };
++
++/* takes 64bit width */
++struct bch_nvmpg_rec {
++ union {
++ struct {
++ __u64 pgoff:52;
++ __u64 order:6;
++ __u64 ns_id:3;
++ __u64 reserved:3;
++ };
++ __u64 _v;
++ };
++};
++
++struct bch_nvmpg_recs {
++ union {
++ struct {
++ /*
++ * A nvmpg offset format pointer to
++ * struct bch_nvmpg_head
++ */
++ __u64 head_offset;
++ /*
++ * A nvmpg offset format pointer to
++ * struct bch_nvm_pgalloc_recs which contains
++ * the next recs[] array.
++ */
++ __u64 next_offset;
++ __u8 magic[16];
++ __u8 uuid[16];
++ __u32 size;
++ __u32 used;
++ __u64 _pad[4];
++ struct bch_nvmpg_rec recs[];
++ };
++ __u8 pad[8192];
++ };
++};
++
++#define BCH_NVMPG_MAX_RECS \
++ ((sizeof(struct bch_nvmpg_recs) - \
++ offsetof(struct bch_nvmpg_recs, recs)) / \
++ sizeof(struct bch_nvmpg_rec))
++
++#define BCH_NVMPG_HD_STAT_FREE 0x0
++#define BCH_NVMPG_HD_STAT_ALLOC 0x1
++struct bch_nvmpg_head {
++ __u8 uuid[16];
++ __u8 label[BCH_NVMPG_LBL_SIZE];
++ __u32 state;
++ __u32 flags;
++ /*
++ * Array of offset values from the nvmpg offset format
++ * pointers, each of the pointer points to a per-namespace
++ * struct bch_nvmpg_recs.
++ */
++ __u64 recs_offset[BCH_NVMPG_NS_MAX];
++};
++
++/* heads[0] is always for nvm_pages internal usage */
++struct bch_nvmpg_set_header {
++ union {
++ struct {
++ __u32 size;
++ __u32 used;
++ __u64 _pad[4];
++ struct bch_nvmpg_head heads[];
++ };
++ __u8 pad[8192];
++ };
++};
++
++#define BCH_NVMPG_MAX_HEADS \
++ ((sizeof(struct bch_nvmpg_set_header) - \
++ offsetof(struct bch_nvmpg_set_header, heads)) / \
++ sizeof(struct bch_nvmpg_head))
++
++/* The on-media bit order is local CPU order */
++struct bch_nvmpg_sb {
++ __u64 csum;
++ __u64 sb_offset;
++ __u64 ns_start;
++ __u64 version;
++ __u8 magic[16];
++ __u8 uuid[16];
++ __u32 page_size;
++ __u32 total_ns;
++ __u32 this_ns;
++ union {
++ __u8 set_uuid[16];
++ __u64 set_magic;
++ };
++
++ __u64 flags;
++ __u64 seq;
++
++ __u64 feature_compat;
++ __u64 feature_incompat;
++ __u64 feature_ro_compat;
++
++ /* For allocable nvm pages from buddy systems */
++ __u64 pages_offset;
++ __u64 pages_total;
++
++ __u64 pad[8];
++
++ /*
++ * A nvmpg offset format pointer, it points
++ * to struct bch_nvmpg_set_header which is
++ * stored only on the first name space.
++ */
++ __u64 set_header_offset;
++
++ /* Just for csum_set() */
++ __u32 keys;
++ __u64 d[0];
++};
++
++#endif /* _NVMPG_FORMAT_H */
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0002-bcache-initialize-the-nvm-pages-allocator.patch b/for-test/nvdimm-support/meta-dev-20230303/0002-bcache-initialize-the-nvm-pages-allocator.patch
new file mode 100644
index 0000000..8a2f463
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0002-bcache-initialize-the-nvm-pages-allocator.patch
@@ -0,0 +1,535 @@
+From 08ce6a36470047a30ac9db26714a566280adddde Mon Sep 17 00:00:00 2001
+From: Jianpeng Ma <jianpeng.ma@intel.com>
+Date: Mon, 26 Jul 2021 10:33:30 +0800
+Subject: [PATCH 02/16] bcache: initialize the nvm pages allocator
+
+This patch define the prototype data structures in memory and
+initializes the nvm pages allocator.
+
+The nvm address space which is managed by this allocator can consist of
+many nvm namespaces, and some namespaces can compose into one nvm set,
+like cache set. For this initial implementation, only one set can be
+supported.
+
+The users of this nvm pages allocator need to call register_namespace()
+to register the nvdimm device (like /dev/pmemX) into this allocator as
+the instance of struct nvm_namespace.
+
+Reported-by: Randy Dunlap <rdunlap@infradead.org>
+Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com>
+Co-developed-by: Qiaowei Ren <qiaowei.ren@intel.com>
+Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Jens Axboe <axboe@kernel.dk>
+---
+ drivers/md/bcache/Kconfig | 10 ++
+ drivers/md/bcache/Makefile | 1 +
+ drivers/md/bcache/nvmpg.c | 333 +++++++++++++++++++++++++++++++++++++
+ drivers/md/bcache/nvmpg.h | 97 +++++++++++
+ drivers/md/bcache/super.c | 3 +
+ 5 files changed, 444 insertions(+)
+ create mode 100644 drivers/md/bcache/nvmpg.c
+ create mode 100644 drivers/md/bcache/nvmpg.h
+
+diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
+index cf3e8096942a..4a7c13e882bb 100644
+--- a/drivers/md/bcache/Kconfig
++++ b/drivers/md/bcache/Kconfig
+@@ -36,3 +36,13 @@ config BCACHE_ASYNC_REGISTRATION
+ device path into this file will returns immediately and the real
+ registration work is handled in kernel work queue in asynchronous
+ way.
++
++config BCACHE_NVM_PAGES
++ bool "NVDIMM support for bcache (EXPERIMENTAL)"
++ depends on BCACHE
++ depends on 64BIT
++ depends on LIBNVDIMM
++ depends on DAX
++ help
++ Allocate/release NV-memory pages for bcache and provide allocated pages
++ for each requestor after system reboot.
+diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
+index 5b87e59676b8..276b33be5ad5 100644
+--- a/drivers/md/bcache/Makefile
++++ b/drivers/md/bcache/Makefile
+@@ -5,3 +5,4 @@ obj-$(CONFIG_BCACHE) += bcache.o
+ bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\
+ io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
+ util.o writeback.o features.o
++bcache-$(CONFIG_BCACHE_NVM_PAGES) += nvmpg.o
+diff --git a/drivers/md/bcache/nvmpg.c b/drivers/md/bcache/nvmpg.c
+new file mode 100644
+index 000000000000..8f4e7fc1ad14
+--- /dev/null
++++ b/drivers/md/bcache/nvmpg.c
+@@ -0,0 +1,333 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Nvdimm page-buddy allocator
++ *
++ * Copyright (c) 2021, Intel Corporation.
++ * Copyright (c) 2021, Qiaowei Ren <qiaowei.ren@intel.com>.
++ * Copyright (c) 2021, Jianpeng Ma <jianpeng.ma@intel.com>.
++ */
++
++#include "bcache.h"
++#include "nvmpg.h"
++
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/mutex.h>
++#include <linux/dax.h>
++#include <linux/pfn_t.h>
++#include <linux/libnvdimm.h>
++#include <linux/mm_types.h>
++#include <linux/err.h>
++#include <linux/pagemap.h>
++#include <linux/bitmap.h>
++#include <linux/blkdev.h>
++
++struct bch_nvmpg_set *global_nvmpg_set;
++
++void *bch_nvmpg_offset_to_ptr(unsigned long offset)
++{
++ int ns_id;
++ struct bch_nvmpg_ns *ns;
++
++ if (offset == 0)
++ return NULL;
++
++ ns_id = BCH_NVMPG_GET_NS_ID(offset);
++ ns = global_nvmpg_set->ns_tbl[ns_id];
++
++ if (ns)
++ return (void *)(ns->base_addr + BCH_NVMPG_GET_OFFSET(offset));
++
++ pr_err("Invalid ns_id %u\n", ns_id);
++ return NULL;
++}
++
++unsigned long bch_nvmpg_ptr_to_offset(struct bch_nvmpg_ns *ns, void *ptr)
++{
++ int ns_id = ns->ns_id;
++ unsigned long offset = (unsigned long)(ptr - ns->base_addr);
++
++ return BCH_NVMPG_OFFSET(ns_id, offset);
++}
++
++static void release_ns_tbl(struct bch_nvmpg_set *set)
++{
++ int i;
++ struct bch_nvmpg_ns *ns;
++
++ for (i = 0; i < BCH_NVMPG_NS_MAX; i++) {
++ ns = set->ns_tbl[i];
++ if (ns) {
++ fs_put_dax(ns->dax_dev);
++ blkdev_put(ns->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXEC);
++ set->ns_tbl[i] = NULL;
++ set->attached_ns--;
++ kfree(ns);
++ }
++ }
++
++ if (set->attached_ns)
++ pr_err("unexpected attached_ns: %u\n", set->attached_ns);
++}
++
++static void release_nvmpg_set(struct bch_nvmpg_set *set)
++{
++ release_ns_tbl(set);
++ kfree(set);
++}
++
++/* Namespace 0 contains all meta data of the nvmpg allocation set */
++static int init_nvmpg_set_header(struct bch_nvmpg_ns *ns)
++{
++ struct bch_nvmpg_set_header *set_header;
++
++ if (ns->ns_id != 0) {
++ pr_err("unexpected ns_id %u for first nvmpg namespace.\n",
++ ns->ns_id);
++ return -EINVAL;
++ }
++
++ set_header = bch_nvmpg_offset_to_ptr(ns->sb->set_header_offset);
++
++ mutex_lock(&global_nvmpg_set->lock);
++ global_nvmpg_set->set_header = set_header;
++ global_nvmpg_set->heads_size = set_header->size;
++ global_nvmpg_set->heads_used = set_header->used;
++ mutex_unlock(&global_nvmpg_set->lock);
++
++ return 0;
++}
++
++static int attach_nvmpg_set(struct bch_nvmpg_ns *ns)
++{
++ struct bch_nvmpg_sb *sb = ns->sb;
++ int rc = 0;
++
++ mutex_lock(&global_nvmpg_set->lock);
++
++ if (global_nvmpg_set->ns_tbl[sb->this_ns]) {
++ pr_err("ns_id %u already attached.\n", ns->ns_id);
++ rc = -EEXIST;
++ goto unlock;
++ }
++
++ if (ns->ns_id != 0) {
++ pr_err("unexpected ns_id %u for first namespace.\n", ns->ns_id);
++ rc = -EINVAL;
++ goto unlock;
++ }
++
++ if (global_nvmpg_set->attached_ns > 0) {
++ pr_err("multiple namespace attaching not supported yet\n");
++ rc = -EOPNOTSUPP;
++ goto unlock;
++ }
++
++ if ((global_nvmpg_set->attached_ns + 1) > sb->total_ns) {
++ pr_err("namespace counters error: attached %u > total %u\n",
++ global_nvmpg_set->attached_ns,
++ global_nvmpg_set->total_ns);
++ rc = -EINVAL;
++ goto unlock;
++ }
++
++ memcpy(global_nvmpg_set->set_uuid, sb->set_uuid, 16);
++ global_nvmpg_set->ns_tbl[sb->this_ns] = ns;
++ global_nvmpg_set->attached_ns++;
++ global_nvmpg_set->total_ns = sb->total_ns;
++
++unlock:
++ mutex_unlock(&global_nvmpg_set->lock);
++ return rc;
++}
++
++static int read_nvdimm_meta_super(struct block_device *bdev,
++ struct bch_nvmpg_ns *ns)
++{
++ struct page *page;
++ struct bch_nvmpg_sb *sb;
++ uint64_t expected_csum = 0;
++ int r;
++
++ page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
++ BCH_NVMPG_SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
++
++ if (IS_ERR(page))
++ return -EIO;
++
++ sb = (struct bch_nvmpg_sb *)
++ (page_address(page) + offset_in_page(BCH_NVMPG_SB_OFFSET));
++
++ r = -EINVAL;
++ expected_csum = csum_set(sb);
++ if (expected_csum != sb->csum) {
++ pr_info("csum is not match with expected one\n");
++ goto put_page;
++ }
++
++ if (memcmp(sb->magic, bch_nvmpg_magic, sizeof(bch_nvmpg_magic))) {
++ pr_info("invalid bch_nvmpg_magic\n");
++ goto put_page;
++ }
++
++ if (sb->sb_offset !=
++ BCH_NVMPG_OFFSET(sb->this_ns, BCH_NVMPG_SB_OFFSET)) {
++ pr_info("invalid superblock offset 0x%llx\n", sb->sb_offset);
++ goto put_page;
++ }
++
++ r = -EOPNOTSUPP;
++ if (sb->total_ns != 1) {
++ pr_info("multiple name space not supported yet.\n");
++ goto put_page;
++ }
++
++
++ r = 0;
++ /* Necessary for DAX mapping */
++ ns->page_size = sb->page_size;
++ ns->pages_total = sb->pages_total;
++
++put_page:
++ put_page(page);
++ return r;
++}
++
++struct bch_nvmpg_ns *bch_register_namespace(const char *dev_path, size_t size)
++{
++ struct bch_nvmpg_ns *ns = NULL;
++ struct bch_nvmpg_sb *sb = NULL;
++ char buf[BDEVNAME_SIZE];
++ struct block_device *bdev;
++ pgoff_t pgoff;
++ u64 start_off;
++ int id, err;
++ char *path;
++ long dax_ret = 0;
++
++ path = kstrndup(dev_path, size, GFP_KERNEL);
++ if (!path) {
++ pr_err("kstrndup failed\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ bdev = blkdev_get_by_path(strim(path),
++ FMODE_READ|FMODE_WRITE|FMODE_EXEC,
++ global_nvmpg_set);
++ if (IS_ERR(bdev)) {
++ pr_err("get %s error: %ld\n", dev_path, PTR_ERR(bdev));
++ kfree(path);
++ return ERR_PTR(PTR_ERR(bdev));
++ }
++
++ err = -ENOMEM;
++ ns = kzalloc(sizeof(struct bch_nvmpg_ns), GFP_KERNEL);
++ if (!ns)
++ goto bdput;
++
++ err = -EIO;
++ if (read_nvdimm_meta_super(bdev, ns)) {
++ pr_err("%s read nvdimm meta super block failed.\n",
++ bdevname(bdev, buf));
++ goto free_ns;
++ }
++
++ err = -EOPNOTSUPP;
++ ns->dax_dev = fs_dax_get_by_bdev(bdev, &start_off);
++ if (!ns->dax_dev) {
++ pr_err("%s don't support DAX\n", bdevname(bdev, buf));
++ goto free_ns;
++ }
++
++ pgoff = start_off >> PAGE_SHIFT;
++
++ err = -EINVAL;
++ id = dax_read_lock();
++ dax_ret = dax_direct_access(ns->dax_dev, pgoff, ns->pages_total,
++ DAX_ACCESS, &ns->base_addr, &ns->start_pfn);
++ if (dax_ret <= 0) {
++ pr_err("dax_direct_access error\n");
++ dax_read_unlock(id);
++ goto free_ns;
++ }
++
++ if (dax_ret < ns->pages_total) {
++ pr_warn("mapped range %ld is less than ns->pages_total %lu\n",
++ dax_ret, ns->pages_total);
++ }
++ dax_read_unlock(id);
++
++ sb = (struct bch_nvmpg_sb *)(ns->base_addr + BCH_NVMPG_SB_OFFSET);
++
++ err = -EINVAL;
++ /* Check magic again to make sure DAX mapping is correct */
++ if (memcmp(sb->magic, bch_nvmpg_magic, sizeof(bch_nvmpg_magic))) {
++ pr_err("invalid bch_nvmpg_magic after DAX mapping\n");
++ goto free_ns;
++ }
++
++ if ((global_nvmpg_set->attached_ns > 0) &&
++ memcmp(sb->set_uuid, global_nvmpg_set->set_uuid, 16)) {
++ pr_err("set uuid does not match with ns_id %u\n", ns->ns_id);
++ goto free_ns;
++ }
++
++ if (sb->set_header_offset !=
++ BCH_NVMPG_OFFSET(sb->this_ns, BCH_NVMPG_RECLIST_HEAD_OFFSET)) {
++ pr_err("Invalid header offset: this_ns %u, ns_id %llu, offset 0x%llx\n",
++ sb->this_ns,
++ BCH_NVMPG_GET_NS_ID(sb->set_header_offset),
++ BCH_NVMPG_GET_OFFSET(sb->set_header_offset));
++ goto free_ns;
++ }
++
++ ns->page_size = sb->page_size;
++ ns->pages_offset = sb->pages_offset;
++ ns->pages_total = sb->pages_total;
++ ns->sb = sb;
++ ns->free = 0;
++ ns->bdev = bdev;
++ ns->set = global_nvmpg_set;
++
++ err = attach_nvmpg_set(ns);
++ if (err < 0)
++ goto free_ns;
++
++ mutex_init(&ns->lock);
++
++ err = init_nvmpg_set_header(ns);
++ if (err < 0)
++ goto free_ns;
++
++ kfree(path);
++ return ns;
++
++free_ns:
++ if (ns->dax_dev)
++ fs_put_dax(ns->dax_dev);
++ kfree(ns);
++bdput:
++ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXEC);
++ kfree(path);
++ return ERR_PTR(err);
++}
++EXPORT_SYMBOL_GPL(bch_register_namespace);
++
++int __init bch_nvmpg_init(void)
++{
++ global_nvmpg_set = kzalloc(sizeof(*global_nvmpg_set), GFP_KERNEL);
++ if (!global_nvmpg_set)
++ return -ENOMEM;
++
++ global_nvmpg_set->total_ns = 0;
++ mutex_init(&global_nvmpg_set->lock);
++
++ pr_info("bcache nvm init\n");
++ return 0;
++}
++
++void bch_nvmpg_exit(void)
++{
++ release_nvmpg_set(global_nvmpg_set);
++ pr_info("bcache nvm exit\n");
++}
+diff --git a/drivers/md/bcache/nvmpg.h b/drivers/md/bcache/nvmpg.h
+new file mode 100644
+index 000000000000..45e14df202ca
+--- /dev/null
++++ b/drivers/md/bcache/nvmpg.h
+@@ -0,0 +1,97 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++
++#ifndef _BCACHE_NVM_PAGES_H
++#define _BCACHE_NVM_PAGES_H
++
++#include <linux/libnvdimm.h>
++
++#include "nvmpg_format.h"
++
++/*
++ * Bcache NVDIMM in memory data structures
++ */
++
++/*
++ * The following three structures in memory records which page(s) allocated
++ * to which owner. After reboot from power failure, they will be initialized
++ * based on nvm pages superblock in NVDIMM device.
++ */
++struct bch_nvmpg_ns {
++ struct bch_nvmpg_sb *sb;
++ void *base_addr;
++
++ unsigned char uuid[16];
++ int ns_id;
++ unsigned int page_size;
++ unsigned long free;
++ unsigned long pages_offset;
++ unsigned long pages_total;
++ pfn_t start_pfn;
++
++ struct dax_device *dax_dev;
++ struct block_device *bdev;
++ struct bch_nvmpg_set *set;
++
++ struct mutex lock;
++};
++
++/*
++ * A set of namespaces. Currently only one set can be supported.
++ */
++struct bch_nvmpg_set {
++ unsigned char set_uuid[16];
++
++ int heads_size;
++ int heads_used;
++ struct bch_nvmpg_set_header *set_header;
++
++ struct bch_nvmpg_ns *ns_tbl[BCH_NVMPG_NS_MAX];
++ int total_ns;
++ int attached_ns;
++
++ struct mutex lock;
++};
++
++#define BCH_NVMPG_NS_ID_BITS 3
++#define BCH_NVMPG_OFFSET_BITS 61
++#define BCH_NVMPG_NS_ID_MASK ((1UL<<BCH_NVMPG_NS_ID_BITS) - 1)
++#define BCH_NVMPG_OFFSET_MASK ((1UL<<BCH_NVMPG_OFFSET_BITS) - 1)
++
++#define BCH_NVMPG_GET_NS_ID(offset) \
++ (((offset) >> BCH_NVMPG_OFFSET_BITS) & BCH_NVMPG_NS_ID_MASK)
++
++#define BCH_NVMPG_GET_OFFSET(offset) ((offset) & BCH_NVMPG_OFFSET_MASK)
++
++#define BCH_NVMPG_OFFSET(ns_id, offset) \
++ ((((ns_id) & BCH_NVMPG_NS_ID_MASK) << BCH_NVMPG_OFFSET_BITS) | \
++ ((offset) & BCH_NVMPG_OFFSET_MASK))
++
++/* Indicate which field in bch_nvmpg_sb to be updated */
++#define BCH_NVMPG_TOTAL_NS 0 /* total_ns */
++
++void *bch_nvmpg_offset_to_ptr(unsigned long offset);
++unsigned long bch_nvmpg_ptr_to_offset(struct bch_nvmpg_ns *ns, void *ptr);
++
++#if defined(CONFIG_BCACHE_NVM_PAGES)
++
++struct bch_nvmpg_ns *bch_register_namespace(const char *dev_path, size_t size);
++int bch_nvmpg_init(void);
++void bch_nvmpg_exit(void);
++
++#else
++
++static inline struct bch_nvmpg_ns *bch_register_namespace(const char *dev_path, size_t size)
++{
++ return NULL;
++}
++
++static inline int bch_nvmpg_init(void)
++{
++ return 0;
++}
++
++static inline void bch_nvmpg_exit(void) { }
++
++#endif /* CONFIG_BCACHE_NVM_PAGES */
++
++#endif /* _BCACHE_NVM_PAGES_H */
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index 3563d15dbaf2..ffe79871aa69 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -14,6 +14,7 @@
+ #include "request.h"
+ #include "writeback.h"
+ #include "features.h"
++#include "nvmpg.h"
+
+ #include <linux/blkdev.h>
+ #include <linux/pagemap.h>
+@@ -2816,6 +2817,7 @@ static void bcache_exit(void)
+ {
+ bch_debug_exit();
+ bch_request_exit();
++ bch_nvmpg_exit();
+ if (bcache_kobj)
+ kobject_put(bcache_kobj);
+ if (bcache_wq)
+@@ -2914,6 +2916,7 @@ static int __init bcache_init(void)
+
+ bch_debug_init();
+ closure_debug_init();
++ bch_nvmpg_init();
+
+ bcache_is_reboot = false;
+
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0003-bcache-initialization-of-the-buddy.patch b/for-test/nvdimm-support/meta-dev-20230303/0003-bcache-initialization-of-the-buddy.patch
new file mode 100644
index 0000000..1b2ebca
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0003-bcache-initialization-of-the-buddy.patch
@@ -0,0 +1,358 @@
+From 83b67de501eda0f93e0c77f3201db343577f1f2f Mon Sep 17 00:00:00 2001
+From: Jianpeng Ma <jianpeng.ma@intel.com>
+Date: Mon, 4 Jul 2022 14:30:24 +0800
+Subject: [PATCH 03/16] bcache: initialization of the buddy
+
+This nvm pages allocator will implement the simple buddy allocator to
+anage the nvm address space. This patch initializes this buddy allocator
+for new namespace.
+
+the unit of alloc/free of the buddy allocator is page. DAX device has
+their struct page(in dram or PMEM).
+
+ struct { /* ZONE_DEVICE pages */
+ /** @pgmap: Points to the hosting device page map. */
+ struct dev_pagemap *pgmap;
+ void *zone_device_data;
+ /*
+ * ZONE_DEVICE private pages are counted as being
+ * mapped so the next 3 words hold the mapping, index,
+ * and private fields from the source anonymous or
+ * page cache page while the page is migrated to device
+ * private memory.
+ * ZONE_DEVICE MEMORY_DEVICE_FS_DAX pages also
+ * use the mapping, index, and private fields when
+ * pmem backed DAX files are mapped.
+ */
+ };
+
+ZONE_DEVICE pages only use pgmap. Other 4 words[16/32 bytes] don't use.
+So the second/third word will be used as 'struct list_head ' which list
+in buddy. The fourth word(that is normal struct page::index) store pgoff
+which the page-offset in the dax device. And the fifth word (that is
+normal struct page::private) store order of buddy. page_type will be used
+to store buddy flags.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com>
+Co-developed-by: Qiaowei Ren <qiaowei.ren@intel.com>
+Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Jens Axboe <axboe@kernel.dk>
+---
+ drivers/md/bcache/nvmpg.c | 211 +++++++++++++++++++++++++++++++++++++-
+ drivers/md/bcache/nvmpg.h | 12 +++
+ 2 files changed, 220 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/md/bcache/nvmpg.c b/drivers/md/bcache/nvmpg.c
+index 8f4e7fc1ad14..feba36ab5541 100644
+--- a/drivers/md/bcache/nvmpg.c
++++ b/drivers/md/bcache/nvmpg.c
+@@ -50,6 +50,36 @@ unsigned long bch_nvmpg_ptr_to_offset(struct bch_nvmpg_ns *ns, void *ptr)
+ return BCH_NVMPG_OFFSET(ns_id, offset);
+ }
+
++static struct page *bch_nvmpg_va_to_pg(void *addr)
++{
++ return virt_to_page(addr);
++}
++
++static void *bch_nvmpg_pgoff_to_ptr(struct bch_nvmpg_ns *ns, pgoff_t pgoff)
++{
++ return ns->base_addr + (pgoff << PAGE_SHIFT);
++}
++
++static void *bch_nvmpg_rec_to_ptr(struct bch_nvmpg_rec *r)
++{
++ struct bch_nvmpg_ns *ns = global_nvmpg_set->ns_tbl[r->ns_id];
++ pgoff_t pgoff = r->pgoff;
++
++ return bch_nvmpg_pgoff_to_ptr(ns, pgoff);
++}
++
++static inline void reserve_nvmpg_pages(struct bch_nvmpg_ns *ns,
++ pgoff_t pgoff, u64 nr)
++{
++ while (nr > 0) {
++ unsigned int num = nr > UINT_MAX ? UINT_MAX : nr;
++
++ bitmap_set(ns->pages_bitmap, pgoff, num);
++ nr -= num;
++ pgoff += num;
++ }
++}
++
+ static void release_ns_tbl(struct bch_nvmpg_set *set)
+ {
+ int i;
+@@ -58,6 +88,10 @@ static void release_ns_tbl(struct bch_nvmpg_set *set)
+ for (i = 0; i < BCH_NVMPG_NS_MAX; i++) {
+ ns = set->ns_tbl[i];
+ if (ns) {
++ kvfree(ns->pages_bitmap);
++ if (ns->recs_bitmap)
++ bitmap_free(ns->recs_bitmap);
++
+ fs_put_dax(ns->dax_dev);
+ blkdev_put(ns->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXEC);
+ set->ns_tbl[i] = NULL;
+@@ -76,10 +110,73 @@ static void release_nvmpg_set(struct bch_nvmpg_set *set)
+ kfree(set);
+ }
+
++static int validate_recs(int ns_id,
++ struct bch_nvmpg_head *head,
++ struct bch_nvmpg_recs *recs)
++{
++ if (memcmp(recs->magic, bch_nvmpg_recs_magic, sizeof(bch_nvmpg_recs_magic))) {
++ pr_err("Invalid bch_nvmpg_recs magic\n");
++ return -EINVAL;
++ }
++
++ if (memcmp(recs->uuid, head->uuid, 16)) {
++ pr_err("Invalid bch_nvmpg_recs uuid\n");
++ return -EINVAL;
++ }
++
++ if (recs->head_offset !=
++ bch_nvmpg_ptr_to_offset(global_nvmpg_set->ns_tbl[ns_id], head)) {
++ pr_err("Invalid recs head_offset\n");
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
++static int reserve_nvmpg_recs(struct bch_nvmpg_recs *recs)
++{
++ int i, used = 0;
++
++ for (i = 0; i < recs->size; i++) {
++ struct bch_nvmpg_rec *r = &recs->recs[i];
++ struct bch_nvmpg_ns *ns;
++ struct page *page;
++ void *addr;
++
++ if (r->pgoff == 0)
++ continue;
++
++ ns = global_nvmpg_set->ns_tbl[r->ns_id];
++ addr = bch_nvmpg_rec_to_ptr(r);
++ if (addr < ns->base_addr) {
++ pr_err("Invalid recorded address\n");
++ return -EINVAL;
++ }
++
++ /* init struct page: index/private */
++ page = bch_nvmpg_va_to_pg(addr);
++ set_page_private(page, r->order);
++ page->index = r->pgoff;
++
++ reserve_nvmpg_pages(ns, r->pgoff, 1L << r->order);
++ used++;
++ }
++
++ if (used != recs->used) {
++ pr_err("used %d doesn't match recs->used %d\n",
++ used, recs->used);
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
+ /* Namespace 0 contains all meta data of the nvmpg allocation set */
+ static int init_nvmpg_set_header(struct bch_nvmpg_ns *ns)
+ {
+ struct bch_nvmpg_set_header *set_header;
++ struct bch_nvmpg_recs *sys_recs;
++ int i, j, used = 0, rc = 0;
+
+ if (ns->ns_id != 0) {
+ pr_err("unexpected ns_id %u for first nvmpg namespace.\n",
+@@ -93,9 +190,82 @@ static int init_nvmpg_set_header(struct bch_nvmpg_ns *ns)
+ global_nvmpg_set->set_header = set_header;
+ global_nvmpg_set->heads_size = set_header->size;
+ global_nvmpg_set->heads_used = set_header->used;
++
++ /* Reserve the used space from buddy allocator */
++ reserve_nvmpg_pages(ns, 0, div_u64(ns->pages_offset, ns->page_size));
++
++ sys_recs = ns->base_addr + BCH_NVMPG_SYSRECS_OFFSET;
++ for (i = 0; i < set_header->size; i++) {
++ struct bch_nvmpg_head *head;
++
++ head = &set_header->heads[i];
++ if (head->state == BCH_NVMPG_HD_STAT_FREE)
++ continue;
++
++ used++;
++ if (used > global_nvmpg_set->heads_size) {
++ pr_err("used heads %d > heads size %d.\n",
++ used, global_nvmpg_set->heads_size);
++ goto unlock;
++ }
++
++ for (j = 0; j < BCH_NVMPG_NS_MAX; j++) {
++ struct bch_nvmpg_recs *recs;
++
++ recs = bch_nvmpg_offset_to_ptr(head->recs_offset[j]);
++
++ /* Iterate the recs list */
++ while (recs) {
++ rc = validate_recs(j, head, recs);
++ if (rc < 0)
++ goto unlock;
++
++ rc = reserve_nvmpg_recs(recs);
++ if (rc < 0)
++ goto unlock;
++
++ bitmap_set(ns->recs_bitmap, recs - sys_recs, 1);
++ recs = bch_nvmpg_offset_to_ptr(recs->next_offset);
++ }
++ }
++ }
++unlock:
+ mutex_unlock(&global_nvmpg_set->lock);
++ return rc;
++}
+
+- return 0;
++static void bch_nvmpg_init_free_space(struct bch_nvmpg_ns *ns)
++{
++ unsigned int start, end, pages;
++ int i;
++ struct page *page;
++ pgoff_t pgoff_start;
++
++ for_each_clear_bitrange(start, end, ns->pages_bitmap, ns->pages_total) {
++ pgoff_start = start;
++ pages = end - start;
++
++ while (pages) {
++ void *addr;
++
++ for (i = BCH_MAX_ORDER - 1; i >= 0; i--) {
++ if ((pgoff_start % (1L << i) == 0) &&
++ (pages >= (1L << i)))
++ break;
++ }
++
++ addr = bch_nvmpg_pgoff_to_ptr(ns, pgoff_start);
++ page = bch_nvmpg_va_to_pg(addr);
++ set_page_private(page, i);
++ page->index = pgoff_start;
++ __SetPageBuddy(page);
++ list_add((struct list_head *)&page->zone_device_data,
++ &ns->free_area[i]);
++
++ pgoff_start += 1L << i;
++ pages -= 1L << i;
++ }
++ }
+ }
+
+ static int attach_nvmpg_set(struct bch_nvmpg_ns *ns)
+@@ -201,7 +371,7 @@ struct bch_nvmpg_ns *bch_register_namespace(const char *dev_path, size_t size)
+ struct block_device *bdev;
+ pgoff_t pgoff;
+ u64 start_off;
+- int id, err;
++ int id, i, err;
+ char *path;
+ long dax_ret = 0;
+
+@@ -295,13 +465,48 @@ struct bch_nvmpg_ns *bch_register_namespace(const char *dev_path, size_t size)
+
+ mutex_init(&ns->lock);
+
++ /*
++ * parameters of bitmap_set/clear are unsigned int.
++ * Given currently size of nvm is far from exceeding this limit,
++ * so only add a WARN_ON message.
++ */
++ WARN_ON(BITS_TO_LONGS(ns->pages_total) > UINT_MAX);
++ ns->pages_bitmap = kvcalloc(BITS_TO_LONGS(ns->pages_total),
++ sizeof(unsigned long), GFP_KERNEL);
++ if (!ns->pages_bitmap) {
++ err = -ENOMEM;
++ goto clear_ns_nr;
++ }
++
++ if (ns->sb->this_ns == 0) {
++ ns->recs_bitmap =
++ bitmap_zalloc(BCH_MAX_PGALLOC_RECS, GFP_KERNEL);
++ if (ns->recs_bitmap == NULL) {
++ err = -ENOMEM;
++ goto free_pages_bitmap;
++ }
++ }
++
++ for (i = 0; i < BCH_MAX_ORDER; i++)
++ INIT_LIST_HEAD(&ns->free_area[i]);
++
+ err = init_nvmpg_set_header(ns);
+ if (err < 0)
+- goto free_ns;
++ goto free_recs_bitmap;
++
++ if (ns->sb->this_ns == 0)
++ /* init buddy allocator */
++ bch_nvmpg_init_free_space(ns);
+
+ kfree(path);
+ return ns;
+
++free_recs_bitmap:
++ bitmap_free(ns->recs_bitmap);
++free_pages_bitmap:
++ kvfree(ns->pages_bitmap);
++clear_ns_nr:
++ global_nvmpg_set->ns_tbl[sb->this_ns] = NULL;
+ free_ns:
+ if (ns->dax_dev)
+ fs_put_dax(ns->dax_dev);
+diff --git a/drivers/md/bcache/nvmpg.h b/drivers/md/bcache/nvmpg.h
+index 45e14df202ca..1e2108221630 100644
+--- a/drivers/md/bcache/nvmpg.h
++++ b/drivers/md/bcache/nvmpg.h
+@@ -11,6 +11,8 @@
+ * Bcache NVDIMM in memory data structures
+ */
+
++#define BCH_MAX_ORDER 20
++
+ /*
+ * The following three structures in memory records which page(s) allocated
+ * to which owner. After reboot from power failure, they will be initialized
+@@ -28,6 +30,11 @@ struct bch_nvmpg_ns {
+ unsigned long pages_total;
+ pfn_t start_pfn;
+
++ unsigned long *pages_bitmap;
++ struct list_head free_area[BCH_MAX_ORDER];
++
++ unsigned long *recs_bitmap;
++
+ struct dax_device *dax_dev;
+ struct block_device *bdev;
+ struct bch_nvmpg_set *set;
+@@ -69,6 +76,11 @@ struct bch_nvmpg_set {
+ /* Indicate which field in bch_nvmpg_sb to be updated */
+ #define BCH_NVMPG_TOTAL_NS 0 /* total_ns */
+
++#define BCH_MAX_PGALLOC_RECS \
++ (min_t(unsigned int, 64, \
++ (BCH_NVMPG_START - BCH_NVMPG_SYSRECS_OFFSET) / \
++ sizeof(struct bch_nvmpg_recs)))
++
+ void *bch_nvmpg_offset_to_ptr(unsigned long offset);
+ unsigned long bch_nvmpg_ptr_to_offset(struct bch_nvmpg_ns *ns, void *ptr);
+
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0004-bcache-bch_nvmpg_alloc_pages-of-the-buddy.patch b/for-test/nvdimm-support/meta-dev-20230303/0004-bcache-bch_nvmpg_alloc_pages-of-the-buddy.patch
new file mode 100644
index 0000000..519f493
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0004-bcache-bch_nvmpg_alloc_pages-of-the-buddy.patch
@@ -0,0 +1,309 @@
+From 7e5c0ec244687ee77485b002030740142bbb97cb Mon Sep 17 00:00:00 2001
+From: Jianpeng Ma <jianpeng.ma@intel.com>
+Date: Mon, 4 Jul 2022 14:53:04 +0800
+Subject: [PATCH 04/16] bcache: bch_nvmpg_alloc_pages() of the buddy
+
+This patch implements the bch_nvmpg_alloc_pages() of the nvm pages buddy
+allocator. In terms of function, this func is like current
+page-buddy-alloc. But the differences are:
+a: it need owner_uuid as parameter which record owner info. And it
+make those info persistence.
+b: it don't need flags like GFP_*. All allocs are the equal.
+c: it don't trigger other ops etc swap/recycle.
+
+Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com>
+Co-developed-by: Qiaowei Ren <qiaowei.ren@intel.com>
+Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Jens Axboe <axboe@kernel.dk>
+---
+ drivers/md/bcache/nvmpg.c | 222 ++++++++++++++++++++++++++++++++++++++
+ drivers/md/bcache/nvmpg.h | 9 ++
+ 2 files changed, 231 insertions(+)
+
+diff --git a/drivers/md/bcache/nvmpg.c b/drivers/md/bcache/nvmpg.c
+index feba36ab5541..875983fee67e 100644
+--- a/drivers/md/bcache/nvmpg.c
++++ b/drivers/md/bcache/nvmpg.c
+@@ -42,6 +42,11 @@ void *bch_nvmpg_offset_to_ptr(unsigned long offset)
+ return NULL;
+ }
+
++static unsigned long bch_nvmpg_offset_to_pgoff(unsigned long nvmpg_offset)
++{
++ return BCH_NVMPG_GET_OFFSET(nvmpg_offset) >> PAGE_SHIFT;
++}
++
+ unsigned long bch_nvmpg_ptr_to_offset(struct bch_nvmpg_ns *ns, void *ptr)
+ {
+ int ns_id = ns->ns_id;
+@@ -60,6 +65,15 @@ static void *bch_nvmpg_pgoff_to_ptr(struct bch_nvmpg_ns *ns, pgoff_t pgoff)
+ return ns->base_addr + (pgoff << PAGE_SHIFT);
+ }
+
++static unsigned long bch_nvmpg_pgoff_to_offset(struct bch_nvmpg_ns *ns,
++ pgoff_t pgoff)
++{
++ int ns_id = ns->ns_id;
++ unsigned long offset = pgoff << PAGE_SHIFT;
++
++ return BCH_NVMPG_OFFSET(ns_id, offset);
++}
++
+ static void *bch_nvmpg_rec_to_ptr(struct bch_nvmpg_rec *r)
+ {
+ struct bch_nvmpg_ns *ns = global_nvmpg_set->ns_tbl[r->ns_id];
+@@ -268,6 +282,214 @@ static void bch_nvmpg_init_free_space(struct bch_nvmpg_ns *ns)
+ }
+ }
+
++
++/* If not found, it will create if create == true */
++static struct bch_nvmpg_head *find_nvmpg_head(const char *uuid, bool create)
++{
++ struct bch_nvmpg_set_header *set_header = global_nvmpg_set->set_header;
++ struct bch_nvmpg_head *head = NULL;
++ int i;
++
++ if (set_header == NULL)
++ goto out;
++
++ for (i = 0; i < set_header->size; i++) {
++ struct bch_nvmpg_head *h = &set_header->heads[i];
++
++ if (h->state != BCH_NVMPG_HD_STAT_ALLOC)
++ continue;
++
++ if (!memcmp(uuid, h->uuid, 16)) {
++ head = h;
++ break;
++ }
++ }
++
++ if (!head && create) {
++ u32 used = set_header->used;
++
++ if (set_header->size > used) {
++ head = &set_header->heads[used];
++ memset(head, 0, sizeof(struct bch_nvmpg_head));
++ head->state = BCH_NVMPG_HD_STAT_ALLOC;
++ memcpy(head->uuid, uuid, 16);
++ global_nvmpg_set->heads_used++;
++ set_header->used++;
++ } else
++ pr_info("No free bch_nvmpg_head\n");
++ }
++
++out:
++ return head;
++}
++
++static struct bch_nvmpg_recs *find_empty_nvmpg_recs(void)
++{
++ unsigned int start;
++ struct bch_nvmpg_ns *ns = global_nvmpg_set->ns_tbl[0];
++ struct bch_nvmpg_recs *recs;
++
++ start = bitmap_find_next_zero_area(ns->recs_bitmap,
++ BCH_MAX_PGALLOC_RECS, 0, 1, 0);
++ if (start > BCH_MAX_PGALLOC_RECS) {
++ pr_info("No free struct bch_nvmpg_recs\n");
++ return NULL;
++ }
++
++ bitmap_set(ns->recs_bitmap, start, 1);
++ recs = (struct bch_nvmpg_recs *)
++ bch_nvmpg_offset_to_ptr(BCH_NVMPG_SYSRECS_OFFSET)
++ + start;
++
++ memset(recs, 0, sizeof(struct bch_nvmpg_recs));
++ return recs;
++}
++
++
++static struct bch_nvmpg_recs *find_nvmpg_recs(struct bch_nvmpg_ns *ns,
++ struct bch_nvmpg_head *head,
++ bool create)
++{
++ int ns_id = ns->sb->this_ns;
++ struct bch_nvmpg_recs *prev_recs = NULL, *recs = NULL;
++
++ recs = bch_nvmpg_offset_to_ptr(head->recs_offset[ns_id]);
++
++ /* If create=false, we return recs[nr] */
++ if (!create)
++ return recs;
++
++ /*
++ * If create=true, it mean we need a empty struct bch_nvmpg_rec
++ * So we should find non-empty struct bch_nvmpg_recs or alloc
++ * new struct bch_nvmpg_recs. And return this bch_nvmpg_recs
++ */
++ while (recs && (recs->used == recs->size)) {
++ prev_recs = recs;
++ recs = bch_nvmpg_offset_to_ptr(recs->next_offset);
++ }
++
++ /* Found empty struct bch_nvmpg_recs */
++ if (recs)
++ return recs;
++
++ /* Need alloc new struct bch_nvmpg_recs */
++ recs = find_empty_nvmpg_recs();
++ if (recs) {
++ unsigned long offset;
++
++ recs->next_offset = 0;
++ recs->head_offset = bch_nvmpg_ptr_to_offset(ns, head);
++ memcpy(recs->magic, bch_nvmpg_recs_magic, sizeof(bch_nvmpg_recs_magic));
++ memcpy(recs->uuid, head->uuid, 16);
++ recs->size = BCH_NVMPG_MAX_RECS;
++ recs->used = 0;
++
++ offset = bch_nvmpg_ptr_to_offset(ns, recs);
++ if (prev_recs)
++ prev_recs->next_offset = offset;
++ else
++ head->recs_offset[ns_id] = offset;
++ }
++
++ return recs;
++}
++
++static void add_nvmpg_rec(struct bch_nvmpg_ns *ns,
++ struct bch_nvmpg_recs *recs,
++ unsigned long nvmpg_offset,
++ int order)
++{
++ int i, ns_id;
++ unsigned long pgoff;
++
++ pgoff = bch_nvmpg_offset_to_pgoff(nvmpg_offset);
++ ns_id = ns->sb->this_ns;
++
++ for (i = 0; i < recs->size; i++) {
++ if (recs->recs[i].pgoff == 0) {
++ recs->recs[i].pgoff = pgoff;
++ recs->recs[i].order = order;
++ recs->recs[i].ns_id = ns_id;
++ recs->used++;
++ break;
++ }
++ }
++ BUG_ON(i == recs->size);
++}
++
++
++unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid)
++{
++ unsigned long nvmpg_offset = 0;
++ struct bch_nvmpg_head *head;
++ int n, o;
++
++ mutex_lock(&global_nvmpg_set->lock);
++ head = find_nvmpg_head(uuid, true);
++
++ if (!head) {
++ pr_err("Cannot find bch_nvmpg_recs by uuid.\n");
++ goto unlock;
++ }
++
++ for (n = 0; n < global_nvmpg_set->total_ns; n++) {
++ struct bch_nvmpg_ns *ns = global_nvmpg_set->ns_tbl[n];
++
++ if (!ns || (ns->free < (1L << order)))
++ continue;
++
++ for (o = order; o < BCH_MAX_ORDER; o++) {
++ struct list_head *list;
++ struct page *page, *buddy_page;
++
++ if (list_empty(&ns->free_area[o]))
++ continue;
++
++ list = ns->free_area[o].next;
++ page = container_of((void *)list, struct page,
++ zone_device_data);
++
++ list_del(list);
++
++ while (o != order) {
++ void *addr;
++ pgoff_t pgoff;
++
++ pgoff = page->index + (1L << (o - 1));
++ addr = bch_nvmpg_pgoff_to_ptr(ns, pgoff);
++ buddy_page = bch_nvmpg_va_to_pg(addr);
++ set_page_private(buddy_page, o - 1);
++ buddy_page->index = pgoff;
++ __SetPageBuddy(buddy_page);
++ list_add((struct list_head *)&buddy_page->zone_device_data,
++ &ns->free_area[o - 1]);
++ o--;
++ }
++
++ set_page_private(page, order);
++ __ClearPageBuddy(page);
++ ns->free -= 1L << order;
++ nvmpg_offset = bch_nvmpg_pgoff_to_offset(ns, page->index);
++ break;
++ }
++
++ if (o < BCH_MAX_ORDER) {
++ struct bch_nvmpg_recs *recs;
++
++ recs = find_nvmpg_recs(ns, head, true);
++ /* ToDo: handle pgalloc_recs==NULL */
++ add_nvmpg_rec(ns, recs, nvmpg_offset, order);
++ break;
++ }
++ }
++
++unlock:
++ mutex_unlock(&global_nvmpg_set->lock);
++ return nvmpg_offset;
++}
++EXPORT_SYMBOL_GPL(bch_nvmpg_alloc_pages);
++
+ static int attach_nvmpg_set(struct bch_nvmpg_ns *ns)
+ {
+ struct bch_nvmpg_sb *sb = ns->sb;
+diff --git a/drivers/md/bcache/nvmpg.h b/drivers/md/bcache/nvmpg.h
+index 1e2108221630..e52bb1f3f79e 100644
+--- a/drivers/md/bcache/nvmpg.h
++++ b/drivers/md/bcache/nvmpg.h
+@@ -76,6 +76,9 @@ struct bch_nvmpg_set {
+ /* Indicate which field in bch_nvmpg_sb to be updated */
+ #define BCH_NVMPG_TOTAL_NS 0 /* total_ns */
+
++#define BCH_PGOFF_TO_KVADDR(pgoff) \
++ ((void *)((unsigned long)(pgoff) << PAGE_SHIFT))
++
+ #define BCH_MAX_PGALLOC_RECS \
+ (min_t(unsigned int, 64, \
+ (BCH_NVMPG_START - BCH_NVMPG_SYSRECS_OFFSET) / \
+@@ -89,6 +92,7 @@ unsigned long bch_nvmpg_ptr_to_offset(struct bch_nvmpg_ns *ns, void *ptr);
+ struct bch_nvmpg_ns *bch_register_namespace(const char *dev_path, size_t size);
+ int bch_nvmpg_init(void);
+ void bch_nvmpg_exit(void);
++unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid);
+
+ #else
+
+@@ -104,6 +108,11 @@ static inline int bch_nvmpg_init(void)
+
+ static inline void bch_nvmpg_exit(void) { }
+
++static inline unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid)
++{
++ return 0;
++}
++
+ #endif /* CONFIG_BCACHE_NVM_PAGES */
+
+ #endif /* _BCACHE_NVM_PAGES_H */
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0005-bcache-bch_nvmpg_free_pages-of-the-buddy-allocator.patch b/for-test/nvdimm-support/meta-dev-20230303/0005-bcache-bch_nvmpg_free_pages-of-the-buddy-allocator.patch
new file mode 100644
index 0000000..b760b14
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0005-bcache-bch_nvmpg_free_pages-of-the-buddy-allocator.patch
@@ -0,0 +1,252 @@
+From 3eeb48d89ffb7cf3670f44e0723a6ed73b14efec Mon Sep 17 00:00:00 2001
+From: Jianpeng Ma <jianpeng.ma@intel.com>
+Date: Thu, 21 Oct 2021 19:06:35 +0800
+Subject: [PATCH 05/16] bcache: bch_nvmpg_free_pages() of the buddy allocator
+
+This patch implements the bch_nvmpg_free_pages() of the buddy allocator.
+
+The difference between this and page-buddy-free:
+it need owner_uuid to free owner allocated pages, and must
+persistent after free.
+
+Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com>
+Co-developed-by: Qiaowei Ren <qiaowei.ren@intel.com>
+Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Jens Axboe <axboe@kernel.dk>
+---
+ drivers/md/bcache/nvmpg.c | 165 ++++++++++++++++++++++++++++++++++++--
+ drivers/md/bcache/nvmpg.h | 3 +
+ 2 files changed, 161 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/md/bcache/nvmpg.c b/drivers/md/bcache/nvmpg.c
+index 875983fee67e..129938603bc7 100644
+--- a/drivers/md/bcache/nvmpg.c
++++ b/drivers/md/bcache/nvmpg.c
+@@ -248,6 +248,57 @@ static int init_nvmpg_set_header(struct bch_nvmpg_ns *ns)
+ return rc;
+ }
+
++static void __free_space(struct bch_nvmpg_ns *ns, unsigned long nvmpg_offset,
++ int order)
++{
++ unsigned long add_pages = (1L << order);
++ pgoff_t pgoff;
++ struct page *page;
++ void *va;
++
++ if (nvmpg_offset == 0) {
++ pr_err("free pages on offset 0\n");
++ return;
++ }
++
++ page = bch_nvmpg_va_to_pg(bch_nvmpg_offset_to_ptr(nvmpg_offset));
++ WARN_ON((!page) || (page->private != order));
++ pgoff = page->index;
++
++ while (order < BCH_MAX_ORDER - 1) {
++ struct page *buddy_page;
++
++ pgoff_t buddy_pgoff = pgoff ^ (1L << order);
++ pgoff_t parent_pgoff = pgoff & ~(1L << order);
++
++ if ((parent_pgoff + (1L << (order + 1)) > ns->pages_total))
++ break;
++
++ va = bch_nvmpg_pgoff_to_ptr(ns, buddy_pgoff);
++ buddy_page = bch_nvmpg_va_to_pg(va);
++ WARN_ON(!buddy_page);
++
++ if (PageBuddy(buddy_page) && (buddy_page->private == order)) {
++ list_del((struct list_head *)&buddy_page->zone_device_data);
++ __ClearPageBuddy(buddy_page);
++ pgoff = parent_pgoff;
++ order++;
++ continue;
++ }
++ break;
++ }
++
++ va = bch_nvmpg_pgoff_to_ptr(ns, pgoff);
++ page = bch_nvmpg_va_to_pg(va);
++ WARN_ON(!page);
++ list_add((struct list_head *)&page->zone_device_data,
++ &ns->free_area[order]);
++ page->index = pgoff;
++ set_page_private(page, order);
++ __SetPageBuddy(page);
++ ns->free += add_pages;
++}
++
+ static void bch_nvmpg_init_free_space(struct bch_nvmpg_ns *ns)
+ {
+ unsigned int start, end, pages;
+@@ -260,21 +311,19 @@ static void bch_nvmpg_init_free_space(struct bch_nvmpg_ns *ns)
+ pages = end - start;
+
+ while (pages) {
+- void *addr;
+-
+ for (i = BCH_MAX_ORDER - 1; i >= 0; i--) {
+ if ((pgoff_start % (1L << i) == 0) &&
+ (pages >= (1L << i)))
+ break;
+ }
+
+- addr = bch_nvmpg_pgoff_to_ptr(ns, pgoff_start);
+- page = bch_nvmpg_va_to_pg(addr);
++ page = bch_nvmpg_va_to_pg(
++ bch_nvmpg_pgoff_to_ptr(ns, pgoff_start));
+ set_page_private(page, i);
+ page->index = pgoff_start;
+- __SetPageBuddy(page);
+- list_add((struct list_head *)&page->zone_device_data,
+- &ns->free_area[i]);
++
++ /* In order to update ns->free */
++ __free_space(ns, bch_nvmpg_pgoff_to_offset(ns, pgoff_start), i);
+
+ pgoff_start += 1L << i;
+ pages -= 1L << i;
+@@ -490,6 +539,107 @@ unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid)
+ }
+ EXPORT_SYMBOL_GPL(bch_nvmpg_alloc_pages);
+
++static inline void *nvm_end_addr(struct bch_nvmpg_ns *ns)
++{
++ return ns->base_addr + (ns->pages_total << PAGE_SHIFT);
++}
++
++static inline bool in_nvmpg_ns_range(struct bch_nvmpg_ns *ns,
++ void *start_addr, void *end_addr)
++{
++ return (start_addr >= ns->base_addr) && (end_addr < nvm_end_addr(ns));
++}
++
++static int remove_nvmpg_rec(struct bch_nvmpg_recs *recs, int ns_id,
++ unsigned long nvmpg_offset, int order)
++{
++ struct bch_nvmpg_head *head;
++ struct bch_nvmpg_recs *prev_recs, *sys_recs;
++ struct bch_nvmpg_ns *ns;
++ unsigned long pgoff;
++ int i;
++
++ ns = global_nvmpg_set->ns_tbl[0];
++ pgoff = bch_nvmpg_offset_to_pgoff(nvmpg_offset);
++
++ head = bch_nvmpg_offset_to_ptr(recs->head_offset);
++ prev_recs = recs;
++ sys_recs = bch_nvmpg_offset_to_ptr(BCH_NVMPG_SYSRECS_OFFSET);
++ while (recs) {
++ for (i = 0; i < recs->size; i++) {
++ struct bch_nvmpg_rec *rec = &(recs->recs[i]);
++
++ if ((rec->pgoff == pgoff) && (rec->ns_id == ns_id)) {
++ WARN_ON(rec->order != order);
++ rec->_v = 0;
++ recs->used--;
++
++ if (recs->used == 0) {
++ int recs_pos = recs - sys_recs;
++
++ if (recs == prev_recs)
++ head->recs_offset[ns_id] =
++ recs->next_offset;
++ else
++ prev_recs->next_offset =
++ recs->next_offset;
++
++ recs->next_offset = 0;
++ recs->head_offset = 0;
++
++ bitmap_clear(ns->recs_bitmap, recs_pos, 1);
++ }
++ goto out;
++ }
++ }
++ prev_recs = recs;
++ recs = bch_nvmpg_offset_to_ptr(recs->next_offset);
++ }
++out:
++ return (recs ? 0 : -ENOENT);
++}
++
++void bch_nvmpg_free_pages(unsigned long nvmpg_offset, int order,
++ const char *uuid)
++{
++ struct bch_nvmpg_ns *ns;
++ struct bch_nvmpg_head *head;
++ struct bch_nvmpg_recs *recs;
++ int r;
++
++ mutex_lock(&global_nvmpg_set->lock);
++
++ ns = global_nvmpg_set->ns_tbl[BCH_NVMPG_GET_NS_ID(nvmpg_offset)];
++ if (!ns) {
++ pr_err("can't find namespace by given kaddr from namespace\n");
++ goto unlock;
++ }
++
++ head = find_nvmpg_head(uuid, false);
++ if (!head) {
++ pr_err("can't found bch_nvmpg_head by uuid\n");
++ goto unlock;
++ }
++
++ recs = find_nvmpg_recs(ns, head, false);
++ if (!recs) {
++ pr_err("can't find bch_nvmpg_recs by uuid\n");
++ goto unlock;
++ }
++
++ r = remove_nvmpg_rec(recs, ns->sb->this_ns, nvmpg_offset, order);
++ if (r < 0) {
++ pr_err("can't find bch_nvmpg_rec\n");
++ goto unlock;
++ }
++
++ __free_space(ns, nvmpg_offset, order);
++
++unlock:
++ mutex_unlock(&global_nvmpg_set->lock);
++}
++EXPORT_SYMBOL_GPL(bch_nvmpg_free_pages);
++
+ static int attach_nvmpg_set(struct bch_nvmpg_ns *ns)
+ {
+ struct bch_nvmpg_sb *sb = ns->sb;
+@@ -677,6 +827,7 @@ struct bch_nvmpg_ns *bch_register_namespace(const char *dev_path, size_t size)
+ ns->pages_offset = sb->pages_offset;
+ ns->pages_total = sb->pages_total;
+ ns->sb = sb;
++ /* increase by __free_space() */
+ ns->free = 0;
+ ns->bdev = bdev;
+ ns->set = global_nvmpg_set;
+diff --git a/drivers/md/bcache/nvmpg.h b/drivers/md/bcache/nvmpg.h
+index e52bb1f3f79e..009582b5771b 100644
+--- a/drivers/md/bcache/nvmpg.h
++++ b/drivers/md/bcache/nvmpg.h
+@@ -93,6 +93,7 @@ struct bch_nvmpg_ns *bch_register_namespace(const char *dev_path, size_t size);
+ int bch_nvmpg_init(void);
+ void bch_nvmpg_exit(void);
+ unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid);
++void bch_nvmpg_free_pages(unsigned long nvmpg_offset, int order, const char *uuid);
+
+ #else
+
+@@ -113,6 +114,8 @@ static inline unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid)
+ return 0;
+ }
+
++static inline void bch_nvmpg_free_pages(unsigned long nvmpg_offset, int order, const char *uuid) { }
++
+ #endif /* CONFIG_BCACHE_NVM_PAGES */
+
+ #endif /* _BCACHE_NVM_PAGES_H */
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0006-bcache-get-recs-list-head-for-allocated-pages-by-specific-uuid.patch b/for-test/nvdimm-support/meta-dev-20230303/0006-bcache-get-recs-list-head-for-allocated-pages-by-specific-uuid.patch
new file mode 100644
index 0000000..fb312f3
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0006-bcache-get-recs-list-head-for-allocated-pages-by-specific-uuid.patch
@@ -0,0 +1,67 @@
+From 5f3c1d461205e4baebfa952650827477d4d988cd Mon Sep 17 00:00:00 2001
+From: Jianpeng Ma <jianpeng.ma@intel.com>
+Date: Thu, 21 Oct 2021 21:06:03 +0800
+Subject: [PATCH 06/16] bcache: get recs list head for allocated pages by
+ specific uuid
+
+This patch implements bch_get_nvmpg_head() of the buddy allocator
+to be used to get recs list head for allocated pages by specific
+uuid. Then the requester (owner) can find all previous allocated
+nvdimm pages by iterating the recs list.
+
+Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com>
+Co-developed-by: Qiaowei Ren <qiaowei.ren@intel.com>
+Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+---
+ drivers/md/bcache/nvmpg.c | 6 ++++++
+ drivers/md/bcache/nvmpg.h | 6 ++++++
+ 2 files changed, 12 insertions(+)
+
+diff --git a/drivers/md/bcache/nvmpg.c b/drivers/md/bcache/nvmpg.c
+index 129938603bc7..87e2f5c3f734 100644
+--- a/drivers/md/bcache/nvmpg.c
++++ b/drivers/md/bcache/nvmpg.c
+@@ -539,6 +539,12 @@ unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid)
+ }
+ EXPORT_SYMBOL_GPL(bch_nvmpg_alloc_pages);
+
++struct bch_nvmpg_head *bch_get_nvmpg_head(const char *uuid)
++{
++ return find_nvmpg_head(uuid, false);
++}
++EXPORT_SYMBOL_GPL(bch_get_nvmpg_head);
++
+ static inline void *nvm_end_addr(struct bch_nvmpg_ns *ns)
+ {
+ return ns->base_addr + (ns->pages_total << PAGE_SHIFT);
+diff --git a/drivers/md/bcache/nvmpg.h b/drivers/md/bcache/nvmpg.h
+index 009582b5771b..a2621c201fa6 100644
+--- a/drivers/md/bcache/nvmpg.h
++++ b/drivers/md/bcache/nvmpg.h
+@@ -94,6 +94,7 @@ int bch_nvmpg_init(void);
+ void bch_nvmpg_exit(void);
+ unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid);
+ void bch_nvmpg_free_pages(unsigned long nvmpg_offset, int order, const char *uuid);
++struct bch_nvmpg_head *bch_get_nvmpg_head(const char *uuid);
+
+ #else
+
+@@ -116,6 +117,11 @@ static inline unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid)
+
+ static inline void bch_nvmpg_free_pages(unsigned long nvmpg_offset, int order, const char *uuid) { }
+
++static inline struct bch_nvmpg_head *bch_get_nvmpg_head(const char *uuid)
++{
++ return NULL;
++}
++
+ #endif /* CONFIG_BCACHE_NVM_PAGES */
+
+ #endif /* _BCACHE_NVM_PAGES_H */
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0007-bcache-use-bucket-index-to-set-GC_MARK_METADATA-for-journal-buckets-in-bch_btree_gc_finish.patch b/for-test/nvdimm-support/meta-dev-20230303/0007-bcache-use-bucket-index-to-set-GC_MARK_METADATA-for-journal-buckets-in-bch_btree_gc_finish.patch
new file mode 100644
index 0000000..366f0a7
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0007-bcache-use-bucket-index-to-set-GC_MARK_METADATA-for-journal-buckets-in-bch_btree_gc_finish.patch
@@ -0,0 +1,48 @@
+From 3e136dd5ecb2ce99a567f81504bf8e85501fab5b Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Fri, 25 Jun 2021 00:17:02 +0800
+Subject: [PATCH 07/16] bcache: use bucket index to set GC_MARK_METADATA for
+ journal buckets in bch_btree_gc_finish()
+
+Currently the meta data bucket locations on cache device are reserved
+after the meta data stored on NVDIMM pages, for the meta data layout
+consistentcy temporarily. So these buckets are still marked as meta data
+by SET_GC_MARK() in bch_btree_gc_finish().
+
+When BCH_FEATURE_INCOMPAT_NVDIMM_META is set, the sb.d[] stores linear
+address of NVDIMM pages and not bucket index anymore. Therefore we
+should avoid to find bucket index from sb.d[], and directly use bucket
+index from ca->sb.first_bucket to (ca->sb.first_bucket +
+ca->sb.njournal_bucketsi) for setting the gc mark of journal bucket.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Jianpeng Ma <jianpeng.ma@intel.com>
+Cc: Qiaowei Ren <qiaowei.ren@intel.com>
+---
+ drivers/md/bcache/btree.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
+index e136d6edc1ed..6a90c33109c7 100644
+--- a/drivers/md/bcache/btree.c
++++ b/drivers/md/bcache/btree.c
+@@ -1761,8 +1761,10 @@ static void bch_btree_gc_finish(struct cache_set *c)
+ ca = c->cache;
+ ca->invalidate_needs_gc = 0;
+
+- for (k = ca->sb.d; k < ca->sb.d + ca->sb.keys; k++)
+- SET_GC_MARK(ca->buckets + *k, GC_MARK_METADATA);
++ /* Range [first_bucket, first_bucket + keys) is for journal buckets */
++ for (i = ca->sb.first_bucket;
++ i < ca->sb.first_bucket + ca->sb.njournal_buckets; i++)
++ SET_GC_MARK(ca->buckets + i, GC_MARK_METADATA);
+
+ for (k = ca->prio_buckets;
+ k < ca->prio_buckets + prio_buckets(ca) * 2; k++)
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0008-bcache-add-bch_nvmpg_flush-to-flush-LLC-of-NVDIMM-pages.patch b/for-test/nvdimm-support/meta-dev-20230303/0008-bcache-add-bch_nvmpg_flush-to-flush-LLC-of-NVDIMM-pages.patch
new file mode 100644
index 0000000..03b1087
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0008-bcache-add-bch_nvmpg_flush-to-flush-LLC-of-NVDIMM-pages.patch
@@ -0,0 +1,64 @@
+From 4fbfdf76868bffede3dde3a11fc28fae50a5f314 Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Mon, 4 Jul 2022 15:14:13 +0800
+Subject: [PATCH 08/16] bcache: add bch_nvmpg_flush() to flush LLC of NVDIMM
+ pages
+
+bch_nvmpg_flush() is added to flush last level cache for all dirty
+NVDIMM pages from the nvmpg allocator. It will be used in future patches
+when last level cache flushing is necessary.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/nvmpg.c | 14 ++++++++++++++
+ drivers/md/bcache/nvmpg.h | 2 ++
+ 2 files changed, 16 insertions(+)
+
+diff --git a/drivers/md/bcache/nvmpg.c b/drivers/md/bcache/nvmpg.c
+index 87e2f5c3f734..aaa7f2ff70ab 100644
+--- a/drivers/md/bcache/nvmpg.c
++++ b/drivers/md/bcache/nvmpg.c
+@@ -299,6 +299,20 @@ static void __free_space(struct bch_nvmpg_ns *ns, unsigned long nvmpg_offset,
+ ns->free += add_pages;
+ }
+
++void bch_nvmpg_flush(void)
++{
++ int i;
++
++ for (i = 0; i < BCH_NVMPG_NS_MAX; i++) {
++ struct bch_nvmpg_ns *ns = global_nvmpg_set->ns_tbl[i];
++
++ if (ns)
++ arch_invalidate_pmem(ns->base_addr,
++ ns->pages_total << PAGE_SHIFT);
++ }
++}
++
++
+ static void bch_nvmpg_init_free_space(struct bch_nvmpg_ns *ns)
+ {
+ unsigned int start, end, pages;
+diff --git a/drivers/md/bcache/nvmpg.h b/drivers/md/bcache/nvmpg.h
+index a2621c201fa6..984c25cdf3d2 100644
+--- a/drivers/md/bcache/nvmpg.h
++++ b/drivers/md/bcache/nvmpg.h
+@@ -94,6 +94,7 @@ int bch_nvmpg_init(void);
+ void bch_nvmpg_exit(void);
+ unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid);
+ void bch_nvmpg_free_pages(unsigned long nvmpg_offset, int order, const char *uuid);
++void bch_nvmpg_flush(void);
+ struct bch_nvmpg_head *bch_get_nvmpg_head(const char *uuid);
+
+ #else
+@@ -116,6 +117,7 @@ static inline unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid)
+ }
+
+ static inline void bch_nvmpg_free_pages(unsigned long nvmpg_offset, int order, const char *uuid) { }
++static inline void bch_nvmpg_flush(void) {}
+
+ static inline struct bch_nvmpg_head *bch_get_nvmpg_head(const char *uuid)
+ {
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0009-bcache-add-BCH_FEATURE_INCOMPAT_NVDIMM_META-into-incompat-feature-set.patch b/for-test/nvdimm-support/meta-dev-20230303/0009-bcache-add-BCH_FEATURE_INCOMPAT_NVDIMM_META-into-incompat-feature-set.patch
new file mode 100644
index 0000000..e5aeaee
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0009-bcache-add-BCH_FEATURE_INCOMPAT_NVDIMM_META-into-incompat-feature-set.patch
@@ -0,0 +1,60 @@
+From 7a123f9624f90472d4fdd703838a7865ccd649ca Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Fri, 25 Jun 2021 00:18:31 +0800
+Subject: [PATCH 09/16] bcache: add BCH_FEATURE_INCOMPAT_NVDIMM_META into
+ incompat feature set
+
+This patch adds BCH_FEATURE_INCOMPAT_NVDIMM_META (value 0x0004) into the
+incompat feature set. When this bit is set by bcache-tools, it indicates
+bcache meta data should be stored on specific NVDIMM meta device.
+
+The bcache meta data mainly includes journal and btree nodes, when this
+bit is set in incompat feature set, bcache will ask the nvm-pages
+allocator for NVDIMM space to store the meta data.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Jianpeng Ma <jianpeng.ma@intel.com>
+Cc: Qiaowei Ren <qiaowei.ren@intel.com>
+---
+ drivers/md/bcache/features.h | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h
+index 09161b89c63e..fab92678be76 100644
+--- a/drivers/md/bcache/features.h
++++ b/drivers/md/bcache/features.h
+@@ -18,11 +18,19 @@
+ #define BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET 0x0001
+ /* real bucket size is (1 << bucket_size) */
+ #define BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE 0x0002
++/* store bcache meta data on nvdimm */
++#define BCH_FEATURE_INCOMPAT_NVDIMM_META 0x0004
+
+ #define BCH_FEATURE_COMPAT_SUPP 0
+ #define BCH_FEATURE_RO_COMPAT_SUPP 0
++#if defined(CONFIG_BCACHE_NVM_PAGES)
++#define BCH_FEATURE_INCOMPAT_SUPP (BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET| \
++ BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE| \
++ BCH_FEATURE_INCOMPAT_NVDIMM_META)
++#else
+ #define BCH_FEATURE_INCOMPAT_SUPP (BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET| \
+ BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE)
++#endif
+
+ #define BCH_HAS_COMPAT_FEATURE(sb, mask) \
+ ((sb)->feature_compat & (mask))
+@@ -90,6 +98,7 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \
+
+ BCH_FEATURE_INCOMPAT_FUNCS(obso_large_bucket, OBSO_LARGE_BUCKET);
+ BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LOG_LARGE_BUCKET_SIZE);
++BCH_FEATURE_INCOMPAT_FUNCS(nvdimm_meta, NVDIMM_META);
+
+ static inline bool bch_has_unknown_compat_features(struct cache_sb *sb)
+ {
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0010-bcache-initialize-bcache-journal-for-NVDIMM-meta-device.patch b/for-test/nvdimm-support/meta-dev-20230303/0010-bcache-initialize-bcache-journal-for-NVDIMM-meta-device.patch
new file mode 100644
index 0000000..0ddf1e5
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0010-bcache-initialize-bcache-journal-for-NVDIMM-meta-device.patch
@@ -0,0 +1,257 @@
+From 06b8897f74cbf6cff0565c9fd85f76985267f5ff Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Mon, 4 Jul 2022 15:17:51 +0800
+Subject: [PATCH 10/16] bcache: initialize bcache journal for NVDIMM meta
+ device
+
+The nvm-pages allocator may store and index the NVDIMM pages allocated
+for bcache journal. This patch adds the initialization to store bcache
+journal space on NVDIMM pages if BCH_FEATURE_INCOMPAT_NVDIMM_META bit is
+set by bcache-tools.
+
+If BCH_FEATURE_INCOMPAT_NVDIMM_META is set, get_nvdimm_journal_space()
+will return the nvmpg_offset of NVDIMM pages for bcache journal,
+- If there is previously allocated space, find it from nvm-pages owner
+ list and return to bch_journal_init().
+- If there is no previously allocated space, require a new NVDIMM range
+ from the nvm-pages allocator, and return it to bch_journal_init().
+
+And in bch_journal_init(), keys in sb.d[] store the corresponding nvmpg
+offset from NVDIMM into sb.d[i].ptr[0] where 'i' is the bucket index to
+iterate all journal buckets.
+
+Later when bcache journaling code stores the journaling jset, the target
+NVDIMM nvmpg offset stored (and updated) in sb.d[i].ptr[0] can be used
+to calculate the linear address in memory copy from DRAM pages into
+NVDIMM pages.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Jianpeng Ma <jianpeng.ma@intel.com>
+Cc: Qiaowei Ren <qiaowei.ren@intel.com>
+---
+ drivers/md/bcache/journal.c | 115 ++++++++++++++++++++++++++++++++++++
+ drivers/md/bcache/journal.h | 2 +-
+ drivers/md/bcache/nvmpg.c | 9 +++
+ drivers/md/bcache/nvmpg.h | 1 +
+ drivers/md/bcache/super.c | 18 +++---
+ 5 files changed, 134 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
+index e5da469a4235..1040692c5cc7 100644
+--- a/drivers/md/bcache/journal.c
++++ b/drivers/md/bcache/journal.c
+@@ -9,6 +9,8 @@
+ #include "btree.h"
+ #include "debug.h"
+ #include "extents.h"
++#include "nvmpg.h"
++#include "features.h"
+
+ #include <trace/events/bcache.h>
+
+@@ -997,3 +999,116 @@ int bch_journal_alloc(struct cache_set *c)
+
+ return 0;
+ }
++
++#if defined(CONFIG_BCACHE_NVM_PAGES)
++
++static unsigned long find_journal_nvmpg_base(struct bch_nvmpg_head *nvmpg_head,
++ struct cache *ca)
++{
++ unsigned long jnl_offset, jnl_pgoff, jnl_ns_id;
++ unsigned long ret_offset = 0;
++ int i;
++
++ jnl_offset = (unsigned long)ca->sb.d[0];
++ jnl_ns_id = BCH_NVMPG_GET_NS_ID(jnl_offset);
++ jnl_pgoff = BCH_NVMPG_GET_OFFSET(jnl_offset) >> PAGE_SHIFT;
++
++ for (i = 0; i < BCH_NVMPG_NS_MAX; i++) {
++ struct bch_nvmpg_recs *recs;
++ struct bch_nvmpg_rec *rec;
++ unsigned long recs_offset = 0;
++ int j;
++
++ recs_offset = nvmpg_head->recs_offset[i];
++ recs = bch_nvmpg_offset_to_ptr(recs_offset);
++ while (recs) {
++ for (j = 0; j < recs->size; j++) {
++ rec = &recs->recs[j];
++ if ((rec->pgoff != jnl_pgoff) ||
++ (rec->ns_id != jnl_ns_id))
++ continue;
++
++ ret_offset = jnl_offset;
++ goto out;
++ }
++ recs_offset = recs->next_offset;
++ recs = bch_nvmpg_offset_to_ptr(recs_offset);
++ }
++ }
++
++out:
++ return ret_offset;
++}
++
++static unsigned long get_journal_nvmpg_space(struct cache *ca)
++{
++ struct bch_nvmpg_head *head = NULL;
++ unsigned long nvmpg_offset;
++ int order;
++
++ head = bch_get_nvmpg_head(ca->sb.set_uuid);
++ if (head) {
++ nvmpg_offset = find_journal_nvmpg_base(head, ca);
++ if (nvmpg_offset)
++ goto found;
++ }
++
++ order = ilog2((ca->sb.bucket_size *
++ ca->sb.njournal_buckets) / PAGE_SECTORS);
++ nvmpg_offset = bch_nvmpg_alloc_pages(order, ca->sb.set_uuid);
++ if (nvmpg_offset)
++ memset(bch_nvmpg_offset_to_ptr(nvmpg_offset),
++ 0, (1 << order) * PAGE_SIZE);
++found:
++ return nvmpg_offset;
++}
++
++#endif /* CONFIG_BCACHE_NVM_PAGES */
++
++static int __bch_journal_nvdimm_init(struct cache *ca)
++{
++ int ret = -1;
++
++#if defined(CONFIG_BCACHE_NVM_PAGES)
++ int i;
++ unsigned long jnl_base = 0;
++
++ jnl_base = get_journal_nvmpg_space(ca);
++ if (!jnl_base) {
++ pr_err("Failed to get journal space from nvdimm\n");
++ goto out;
++ }
++
++ /* Iniialized and reloaded from on-disk super block already */
++ if (ca->sb.d[0] != 0) {
++ ret = 0;
++ goto out;
++ }
++
++ for (i = 0; i < ca->sb.keys; i++)
++ ca->sb.d[i] = jnl_base + (bucket_bytes(ca) * i);
++
++ ret = 0;
++out:
++#endif /* CONFIG_BCACHE_NVM_PAGES */
++
++ return ret;
++}
++
++
++int bch_journal_init(struct cache_set *c)
++{
++ int i, ret = 0;
++ struct cache *ca = c->cache;
++
++ ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7,
++ 2, SB_JOURNAL_BUCKETS);
++
++ if (!bch_has_feature_nvdimm_meta(&ca->sb)) {
++ for (i = 0; i < ca->sb.keys; i++)
++ ca->sb.d[i] = ca->sb.first_bucket + i;
++ } else
++ ret = __bch_journal_nvdimm_init(ca);
++
++ return ret;
++}
+diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
+index cd316b4a1e95..3387659c5ede 100644
+--- a/drivers/md/bcache/journal.h
++++ b/drivers/md/bcache/journal.h
+@@ -180,7 +180,7 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list);
+ void bch_journal_meta(struct cache_set *c, struct closure *cl);
+ int bch_journal_read(struct cache_set *c, struct list_head *list);
+ int bch_journal_replay(struct cache_set *c, struct list_head *list);
+-
++int bch_journal_init(struct cache_set *c);
+ void bch_journal_free(struct cache_set *c);
+ int bch_journal_alloc(struct cache_set *c);
+ void bch_journal_space_reserve(struct journal *j);
+diff --git a/drivers/md/bcache/nvmpg.c b/drivers/md/bcache/nvmpg.c
+index aaa7f2ff70ab..642d2e21c565 100644
+--- a/drivers/md/bcache/nvmpg.c
++++ b/drivers/md/bcache/nvmpg.c
+@@ -24,6 +24,15 @@
+
+ struct bch_nvmpg_set *global_nvmpg_set;
+
++struct bch_nvmpg_ns *bch_nvmpg_id_to_ns(int ns_id)
++{
++ if ((ns_id >= 0) && (ns_id < BCH_NVMPG_NS_MAX))
++ return global_nvmpg_set->ns_tbl[ns_id];
++
++ pr_emerg("Invalid ns_id: %d\n", ns_id);
++ return NULL;
++}
++
+ void *bch_nvmpg_offset_to_ptr(unsigned long offset)
+ {
+ int ns_id;
+diff --git a/drivers/md/bcache/nvmpg.h b/drivers/md/bcache/nvmpg.h
+index 984c25cdf3d2..a8a36a6caebb 100644
+--- a/drivers/md/bcache/nvmpg.h
++++ b/drivers/md/bcache/nvmpg.h
+@@ -96,6 +96,7 @@ unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid);
+ void bch_nvmpg_free_pages(unsigned long nvmpg_offset, int order, const char *uuid);
+ void bch_nvmpg_flush(void);
+ struct bch_nvmpg_head *bch_get_nvmpg_head(const char *uuid);
++struct bch_nvmpg_ns *bch_nvmpg_id_to_ns(int ns_id);
+
+ #else
+
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index ffe79871aa69..eb04e8a4d6e7 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -146,9 +146,11 @@ static const char *read_super_common(struct cache_sb *sb, struct block_device *
+ goto err;
+
+ err = "Journal buckets not sequential";
+- for (i = 0; i < sb->keys; i++)
+- if (sb->d[i] != sb->first_bucket + i)
+- goto err;
++ if (!bch_has_feature_nvdimm_meta(sb)) {
++ for (i = 0; i < sb->keys; i++)
++ if (sb->d[i] != sb->first_bucket + i)
++ goto err;
++ }
+
+ err = "Too many journal buckets";
+ if (sb->first_bucket + sb->keys > sb->nbuckets)
+@@ -2065,14 +2067,10 @@ static int run_cache_set(struct cache_set *c)
+ if (bch_journal_replay(c, &journal))
+ goto err;
+ } else {
+- unsigned int j;
+-
+ pr_notice("invalidating existing data\n");
+- ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7,
+- 2, SB_JOURNAL_BUCKETS);
+-
+- for (j = 0; j < ca->sb.keys; j++)
+- ca->sb.d[j] = ca->sb.first_bucket + j;
++ err = "error initializing journal";
++ if (bch_journal_init(c))
++ goto err;
+
+ bch_initial_gc_finish(c);
+
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0011-bcache-support-storing-bcache-journal-into-NVDIMM-meta-device.patch b/for-test/nvdimm-support/meta-dev-20230303/0011-bcache-support-storing-bcache-journal-into-NVDIMM-meta-device.patch
new file mode 100644
index 0000000..610da85
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0011-bcache-support-storing-bcache-journal-into-NVDIMM-meta-device.patch
@@ -0,0 +1,232 @@
+From 03b3d8c0e44a1dccecfdf13d2803070afd387516 Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Tue, 7 Jun 2022 12:07:23 +0800
+Subject: [PATCH 11/16] bcache: support storing bcache journal into NVDIMM meta
+ device
+
+This patch implements two methods to store bcache journal to,
+1) __journal_write_unlocked() for block interface device
+ The latency method to compose bio and issue the jset bio to cache
+ device (e.g. SSD). c->journal.key.ptr[0] indicates the LBA on cache
+ device to store the journal jset.
+2) __journal_nvdimm_write_unlocked() for memory interface NVDIMM
+ Use memory interface to access NVDIMM pages and store the jset with
+ memcpy() and bch_nvmpg_flush(). c->journal.key.ptr[0] indicates the
+ linear address from the NVDIMM pages to store the journal jset.
+
+For legacy configuration without NVDIMM meta device, journal I/O is
+handled by __journal_write_unlocked() with existing code logic. If the
+NVDIMM meta device is used (by bcache-tools), the journal I/O will
+be handled by __journal_nvdimm_write_unlocked() and go into the NVDIMM
+pages.
+
+And when NVDIMM meta device is used, sb.d[] stores the linear addresses
+from NVDIMM pages (no more bucket index), in journal_reclaim() the
+journaling location in c->journal.key.ptr[0] should also be updated by
+linear address from NVDIMM pages (no more LBA combined by sectors offset
+and bucket index).
+
+Signed-off-by: Coly Li <colyli@suse.de>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Jianpeng Ma <jianpeng.ma@intel.com>
+Cc: Qiaowei Ren <qiaowei.ren@intel.com>
+---
+ drivers/md/bcache/journal.c | 121 +++++++++++++++++++++++++-----------
+ drivers/md/bcache/super.c | 3 +-
+ 2 files changed, 86 insertions(+), 38 deletions(-)
+
+diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
+index 1040692c5cc7..9c325be17830 100644
+--- a/drivers/md/bcache/journal.c
++++ b/drivers/md/bcache/journal.c
+@@ -599,6 +599,8 @@ static void do_journal_discard(struct cache *ca)
+ return;
+ }
+
++ BUG_ON(bch_has_feature_nvdimm_meta(&ca->sb));
++
+ switch (atomic_read(&ja->discard_in_flight)) {
+ case DISCARD_IN_FLIGHT:
+ return;
+@@ -678,9 +680,16 @@ static void journal_reclaim(struct cache_set *c)
+ goto out;
+
+ ja->cur_idx = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
+- k->ptr[0] = MAKE_PTR(0,
+- bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
+- ca->sb.nr_this_dev);
++
++ if (!bch_has_feature_nvdimm_meta(&ca->sb))
++ k->ptr[0] = MAKE_PTR(0,
++ bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
++ ca->sb.nr_this_dev);
++#if defined(CONFIG_BCACHE_NVM_PAGES)
++ else
++ k->ptr[0] = (unsigned long)bch_nvmpg_offset_to_ptr(
++ ca->sb.d[ja->cur_idx]);
++#endif
+ atomic_long_inc(&c->reclaimed_journal_buckets);
+
+ bkey_init(k);
+@@ -746,46 +755,21 @@ static void journal_write_unlock(struct closure *cl)
+ spin_unlock(&c->journal.lock);
+ }
+
+-static void journal_write_unlocked(struct closure *cl)
++
++static void __journal_write_unlocked(struct cache_set *c)
+ __releases(c->journal.lock)
+ {
+- struct cache_set *c = container_of(cl, struct cache_set, journal.io);
+- struct cache *ca = c->cache;
+- struct journal_write *w = c->journal.cur;
+ struct bkey *k = &c->journal.key;
+- unsigned int i, sectors = set_blocks(w->data, block_bytes(ca)) *
+- ca->sb.block_size;
+-
++ struct journal_write *w = c->journal.cur;
++ struct closure *cl = &c->journal.io;
++ struct cache *ca = c->cache;
+ struct bio *bio;
+ struct bio_list list;
++ unsigned int i, sectors = set_blocks(w->data, block_bytes(ca)) *
++ ca->sb.block_size;
+
+ bio_list_init(&list);
+
+- if (!w->need_write) {
+- closure_return_with_destructor(cl, journal_write_unlock);
+- return;
+- } else if (journal_full(&c->journal)) {
+- journal_reclaim(c);
+- spin_unlock(&c->journal.lock);
+-
+- btree_flush_write(c);
+- continue_at(cl, journal_write, bch_journal_wq);
+- return;
+- }
+-
+- c->journal.blocks_free -= set_blocks(w->data, block_bytes(ca));
+-
+- w->data->btree_level = c->root->level;
+-
+- bkey_copy(&w->data->btree_root, &c->root->key);
+- bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket);
+-
+- w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0];
+- w->data->magic = jset_magic(&ca->sb);
+- w->data->version = BCACHE_JSET_VERSION;
+- w->data->last_seq = last_seq(&c->journal);
+- w->data->csum = csum_set(w->data);
+-
+ for (i = 0; i < KEY_PTRS(k); i++) {
+ ca = c->cache;
+ bio = &ca->journal.bio;
+@@ -808,7 +792,6 @@ static void journal_write_unlocked(struct closure *cl)
+
+ ca->journal.seq[ca->journal.cur_idx] = w->data->seq;
+ }
+-
+ /* If KEY_PTRS(k) == 0, this jset gets lost in air */
+ BUG_ON(i == 0);
+
+@@ -820,6 +803,72 @@ static void journal_write_unlocked(struct closure *cl)
+
+ while ((bio = bio_list_pop(&list)))
+ closure_bio_submit(c, bio, cl);
++}
++
++#if defined(CONFIG_BCACHE_NVM_PAGES)
++
++static void __journal_nvdimm_write_unlocked(struct cache_set *c)
++ __releases(c->journal.lock)
++{
++ struct journal_write *w = c->journal.cur;
++ struct cache *ca = c->cache;
++ unsigned int sectors;
++
++ sectors = set_blocks(w->data, block_bytes(ca)) * ca->sb.block_size;
++ atomic_long_add(sectors, &ca->meta_sectors_written);
++
++ memcpy((void *)c->journal.key.ptr[0], w->data, sectors << 9);
++ bch_nvmpg_flush();
++
++ c->journal.key.ptr[0] += sectors << 9;
++ ca->journal.seq[ca->journal.cur_idx] = w->data->seq;
++
++ atomic_dec_bug(&fifo_back(&c->journal.pin));
++ bch_journal_next(&c->journal);
++ journal_reclaim(c);
++
++ spin_unlock(&c->journal.lock);
++}
++
++#endif /* CONFIG_BCACHE_NVM_PAGES */
++
++static void journal_write_unlocked(struct closure *cl)
++{
++ struct cache_set *c = container_of(cl, struct cache_set, journal.io);
++ struct cache *ca = c->cache;
++ struct journal_write *w = c->journal.cur;
++
++ if (!w->need_write) {
++ closure_return_with_destructor(cl, journal_write_unlock);
++ return;
++ } else if (journal_full(&c->journal)) {
++ journal_reclaim(c);
++ spin_unlock(&c->journal.lock);
++
++ btree_flush_write(c);
++ continue_at(cl, journal_write, bch_journal_wq);
++ return;
++ }
++
++ c->journal.blocks_free -= set_blocks(w->data, block_bytes(ca));
++
++ w->data->btree_level = c->root->level;
++
++ bkey_copy(&w->data->btree_root, &c->root->key);
++ bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket);
++
++ w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0];
++ w->data->magic = jset_magic(&ca->sb);
++ w->data->version = BCACHE_JSET_VERSION;
++ w->data->last_seq = last_seq(&c->journal);
++ w->data->csum = csum_set(w->data);
++
++ if (!bch_has_feature_nvdimm_meta(&ca->sb))
++ __journal_write_unlocked(c);
++#if defined(CONFIG_BCACHE_NVM_PAGES)
++ else
++ __journal_nvdimm_write_unlocked(c);
++#endif
+
+ continue_at(cl, journal_write_done, NULL);
+ }
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index eb04e8a4d6e7..7581c3eaf34e 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -1676,7 +1676,7 @@ void bch_cache_set_release(struct kobject *kobj)
+ static void cache_set_free(struct closure *cl)
+ {
+ struct cache_set *c = container_of(cl, struct cache_set, cl);
+- struct cache *ca;
++ struct cache *ca = c->cache;
+
+ debugfs_remove(c->debug);
+
+@@ -1688,7 +1688,6 @@ static void cache_set_free(struct closure *cl)
+ bch_bset_sort_state_free(&c->sort);
+ free_pages((unsigned long) c->uuids, ilog2(meta_bucket_pages(&c->cache->sb)));
+
+- ca = c->cache;
+ if (ca) {
+ ca->set = NULL;
+ c->cache = NULL;
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0012-bcache-read-jset-from-NVDIMM-pages-for-journal-replay.patch b/for-test/nvdimm-support/meta-dev-20230303/0012-bcache-read-jset-from-NVDIMM-pages-for-journal-replay.patch
new file mode 100644
index 0000000..9adc56a
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0012-bcache-read-jset-from-NVDIMM-pages-for-journal-replay.patch
@@ -0,0 +1,177 @@
+From 97fbf32af7a3a3da1fa05a70df380f173b553725 Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Tue, 7 Jun 2022 12:22:29 +0800
+Subject: [PATCH 12/16] bcache: read jset from NVDIMM pages for journal replay
+
+This patch implements two methods to read jset from media for journal
+replay,
+- __jnl_rd_bkt() for block device
+ This is the legacy method to read jset via block device interface.
+- __jnl_rd_nvm_bkt() for NVDIMM
+ This is the method to read jset from NVDIMM memory interface, a.k.a
+ memcopy() from NVDIMM pages to DRAM pages.
+
+If BCH_FEATURE_INCOMPAT_NVDIMM_META is set in incompat feature set,
+during running cache set, journal_read_bucket() will read the journal
+content from NVDIMM by __jnl_rd_nvm_bkt(). The linear addresses of
+NVDIMM pages to read jset are stored in sb.d[SB_JOURNAL_BUCKETS], which
+were initialized and maintained in previous runs of the cache set.
+
+A thing should be noticed is, when bch_journal_read() is called, the
+linear address of NVDIMM pages is not loaded and initialized yet, it
+is necessary to call __bch_journal_nvdimm_init() before reading the jset
+from NVDIMM pages.
+
+The code comments added in journal_read_bucket() is noticed by kernel
+test robot and Dan Carpenter, it explains why it is safe to only check
+!bch_has_feature_nvdimm_meta() condition in the if() statement when
+CONFIG_BCACHE_NVM_PAGES is not configured. To avoid confusion from the
+bogus warning message from static checking tool.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+Reported-by: kernel test robot <lkp@intel.com>
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Jianpeng Ma <jianpeng.ma@intel.com>
+Cc: Qiaowei Ren <qiaowei.ren@intel.com>
+---
+ drivers/md/bcache/journal.c | 84 ++++++++++++++++++++++++++++++-------
+ 1 file changed, 69 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
+index 9c325be17830..24615df1f4e6 100644
+--- a/drivers/md/bcache/journal.c
++++ b/drivers/md/bcache/journal.c
+@@ -34,18 +34,58 @@ static void journal_read_endio(struct bio *bio)
+ closure_put(cl);
+ }
+
++static struct jset *__jnl_rd_bkt(struct cache *ca, unsigned int bkt_idx,
++ unsigned int len, unsigned int offset,
++ struct closure *cl)
++{
++ sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bkt_idx]);
++ struct bio *bio = &ca->journal.bio;
++ struct jset *data = ca->set->journal.w[0].data;
++
++ bio_reset(bio, ca->bdev, REQ_OP_READ);
++ bio->bi_iter.bi_sector = bucket + offset;
++ bio->bi_iter.bi_size = len << 9;
++
++ bio->bi_end_io = journal_read_endio;
++ bio->bi_private = cl;
++ bch_bio_map(bio, data);
++
++ closure_bio_submit(ca->set, bio, cl);
++ closure_sync(cl);
++
++ /* Indeed journal.w[0].data */
++ return data;
++}
++
++#if defined(CONFIG_BCACHE_NVM_PAGES)
++
++static struct jset *__jnl_rd_nvm_bkt(struct cache *ca, unsigned int bkt_idx,
++ unsigned int len, unsigned int offset)
++{
++ void *jset_addr;
++ struct jset *data;
++
++ jset_addr = bch_nvmpg_offset_to_ptr(ca->sb.d[bkt_idx]) + (offset << 9);
++ data = ca->set->journal.w[0].data;
++
++ memcpy(data, jset_addr, len << 9);
++
++ /* Indeed journal.w[0].data */
++ return data;
++}
++
++#endif /* CONFIG_BCACHE_NVM_PAGES */
++
+ static int journal_read_bucket(struct cache *ca, struct list_head *list,
+ unsigned int bucket_index)
+ {
+ struct journal_device *ja = &ca->journal;
+- struct bio *bio = &ja->bio;
+
+ struct journal_replay *i;
+- struct jset *j, *data = ca->set->journal.w[0].data;
++ struct jset *j;
+ struct closure cl;
+ unsigned int len, left, offset = 0;
+ int ret = 0;
+- sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]);
+
+ closure_init_stack(&cl);
+
+@@ -55,24 +95,27 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
+ reread: left = ca->sb.bucket_size - offset;
+ len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS);
+
+- bio_reset(bio, ca->bdev, REQ_OP_READ);
+- bio->bi_iter.bi_sector = bucket + offset;
+- bio->bi_iter.bi_size = len << 9;
+-
+- bio->bi_end_io = journal_read_endio;
+- bio->bi_private = &cl;
+- bch_bio_map(bio, data);
+-
+- closure_bio_submit(ca->set, bio, &cl);
+- closure_sync(&cl);
++ if (!bch_has_feature_nvdimm_meta(&ca->sb))
++ j = __jnl_rd_bkt(ca, bucket_index, len, offset, &cl);
++ /*
++ * If CONFIG_BCACHE_NVM_PAGES is not defined, the feature bit
++ * BCH_FEATURE_INCOMPAT_NVDIMM_META won't in incompatible
++ * support feature set, a cache device format with feature bit
++ * BCH_FEATURE_INCOMPAT_NVDIMM_META will fail much earlier in
++ * read_super() by bch_has_unknown_incompat_features().
++ * Therefore when CONFIG_BCACHE_NVM_PAGES is not define, it is
++ * safe to ignore the bch_has_feature_nvdimm_meta() condition.
++ */
++#if defined(CONFIG_BCACHE_NVM_PAGES)
++ else
++ j = __jnl_rd_nvm_bkt(ca, bucket_index, len, offset);
++#endif
+
+ /* This function could be simpler now since we no longer write
+ * journal entries that overlap bucket boundaries; this means
+ * the start of a bucket will always have a valid journal entry
+ * if it has any journal entries at all.
+ */
+-
+- j = data;
+ while (len) {
+ struct list_head *where;
+ size_t blocks, bytes = set_bytes(j);
+@@ -168,6 +211,8 @@ reread: left = ca->sb.bucket_size - offset;
+ return ret;
+ }
+
++static int __bch_journal_nvdimm_init(struct cache *ca);
++
+ int bch_journal_read(struct cache_set *c, struct list_head *list)
+ {
+ #define read_bucket(b) \
+@@ -186,6 +231,15 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
+ unsigned int i, l, r, m;
+ uint64_t seq;
+
++ /*
++ * Linear addresses of NVDIMM pages for journaling is not
++ * initialized yet, do it before read jset from NVDIMM pages.
++ */
++ if (bch_has_feature_nvdimm_meta(&ca->sb)) {
++ if (__bch_journal_nvdimm_init(ca) < 0)
++ return -ENXIO;
++ }
++
+ bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
+ pr_debug("%u journal buckets\n", ca->sb.njournal_buckets);
+
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0013-bcache-add-sysfs-interface-register_nvdimm_meta-to-register-NVDIMM-meta-device.patch b/for-test/nvdimm-support/meta-dev-20230303/0013-bcache-add-sysfs-interface-register_nvdimm_meta-to-register-NVDIMM-meta-device.patch
new file mode 100644
index 0000000..3c23ad5
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0013-bcache-add-sysfs-interface-register_nvdimm_meta-to-register-NVDIMM-meta-device.patch
@@ -0,0 +1,84 @@
+From f5e95ef2ef8d076e95a673ce4249503ea46a5d42 Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Sat, 24 Jul 2021 00:55:25 +0800
+Subject: [PATCH 13/16] bcache: add sysfs interface register_nvdimm_meta to
+ register NVDIMM meta device
+
+This patch adds a sysfs interface register_nvdimm_meta to register
+NVDIMM meta device. The sysfs interface file only shows up when
+CONFIG_BCACHE_NVM_PAGES=y. Then a NVDIMM name space formatted by
+bcache-tools can be registered into bcache by e.g.,
+ echo /dev/pmem0 > /sys/fs/bcache/register_nvdimm_meta
+
+Signed-off-by: Coly Li <colyli@suse.de>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Jianpeng Ma <jianpeng.ma@intel.com>
+Cc: Qiaowei Ren <qiaowei.ren@intel.com>
+---
+ drivers/md/bcache/super.c | 29 +++++++++++++++++++++++++++++
+ 1 file changed, 29 insertions(+)
+
+diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
+index 7581c3eaf34e..0043b0675df8 100644
+--- a/drivers/md/bcache/super.c
++++ b/drivers/md/bcache/super.c
+@@ -2403,10 +2403,18 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
+ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
+ struct kobj_attribute *attr,
+ const char *buffer, size_t size);
++#if defined(CONFIG_BCACHE_NVM_PAGES)
++static ssize_t register_nvdimm_meta(struct kobject *k,
++ struct kobj_attribute *attr,
++ const char *buffer, size_t size);
++#endif
+
+ kobj_attribute_write(register, register_bcache);
+ kobj_attribute_write(register_quiet, register_bcache);
+ kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup);
++#if defined(CONFIG_BCACHE_NVM_PAGES)
++kobj_attribute_write(register_nvdimm_meta, register_nvdimm_meta);
++#endif
+
+ static bool bch_is_open_backing(dev_t dev)
+ {
+@@ -2520,6 +2528,24 @@ static void register_device_async(struct async_reg_args *args)
+ queue_delayed_work(system_wq, &args->reg_work, 10);
+ }
+
++#if defined(CONFIG_BCACHE_NVM_PAGES)
++static ssize_t register_nvdimm_meta(struct kobject *k, struct kobj_attribute *attr,
++ const char *buffer, size_t size)
++{
++ ssize_t ret = size;
++
++ struct bch_nvmpg_ns *ns = bch_register_namespace(buffer, size);
++
++ if (IS_ERR(ns)) {
++ pr_err("register nvdimm namespace %s for meta device failed.\n",
++ buffer);
++ ret = -EINVAL;
++ }
++
++ return ret;
++}
++#endif
++
+ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
+ const char *buffer, size_t size)
+ {
+@@ -2862,6 +2888,9 @@ static int __init bcache_init(void)
+ static const struct attribute *files[] = {
+ &ksysfs_register.attr,
+ &ksysfs_register_quiet.attr,
++#if defined(CONFIG_BCACHE_NVM_PAGES)
++ &ksysfs_register_nvdimm_meta.attr,
++#endif
+ &ksysfs_pendings_cleanup.attr,
+ NULL
+ };
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0014-bcache-add-helper-routines-to-convert-bkey-and-nvmpg-offset.patch b/for-test/nvdimm-support/meta-dev-20230303/0014-bcache-add-helper-routines-to-convert-bkey-and-nvmpg-offset.patch
new file mode 100644
index 0000000..5529725
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0014-bcache-add-helper-routines-to-convert-bkey-and-nvmpg-offset.patch
@@ -0,0 +1,169 @@
+From 0b109e7d981a92628a89fe01706fef89196abddd Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Mon, 4 Jul 2022 21:04:09 +0800
+Subject: [PATCH 14/16] bcache: add helper routines to convert bkey and nvmpg
+ offset
+
+This patch adds the following routines to convert bkey format offset and
+nvmpg format offset, and convert a bkey format offset to linear address
+on nvmpg pages,
+- bug_on_bkey_offset_limit()
+ If the bkey format offset is too large (should not happen), call BUG()
+ to avoid further chaos.
+- bkey_offset_to_nvmpg_ns_id()
+ Extract the nvmpg namespace id from bkey format offset value.
+- bkey_offset_to_nvmpg_offset()
+ Convert bkey format offset to nvmpg format offset.
+- nvmpg_offset_to_bkey_offset()
+ Convert nvmpg format offset to bkey format offset.
+- bkey_offset_to_nvmpg_ptr()
+ Convert bkey format offset to a linear address on nvmpg pages.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/nvmpg.c | 48 +++++++++++++++++++++++++++++++++++++
+ drivers/md/bcache/nvmpg.h | 50 +++++++++++++++++++++++++++++++++++++--
+ 2 files changed, 96 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/md/bcache/nvmpg.c b/drivers/md/bcache/nvmpg.c
+index 642d2e21c565..da1045c0e10c 100644
+--- a/drivers/md/bcache/nvmpg.c
++++ b/drivers/md/bcache/nvmpg.c
+@@ -91,6 +91,54 @@ static void *bch_nvmpg_rec_to_ptr(struct bch_nvmpg_rec *r)
+ return bch_nvmpg_pgoff_to_ptr(ns, pgoff);
+ }
+
++static void bug_on_bkey_offset_limit(unsigned long sector)
++{
++ if (sector >= ((1UL << BCH_BKEY_OFFSET_BITS) - 1)) {
++ pr_err("Invalid NVDIMM offset: too large as 0x%lx\n", sector);
++ pr_err("Such condition should never happen. Panic.\n");
++ BUG();
++ }
++}
++
++int bkey_offset_to_nvmpg_ns_id(unsigned long bkey_offset)
++{
++ return (bkey_offset >> BCH_BKEY_OFFSET_BITS) &
++ BCH_BKEY_OFFSET_NS_ID_MASK;
++}
++
++unsigned long bkey_offset_to_nvmpg_offset(unsigned long bkey_offset)
++{
++ int ns_id;
++ unsigned long offset;
++
++ ns_id = (bkey_offset >> BCH_BKEY_OFFSET_BITS) &
++ BCH_BKEY_OFFSET_NS_ID_MASK;
++
++ offset = (bkey_offset & BCH_BKEY_OFFSET_MASK) << 9;
++ return BCH_NVMPG_OFFSET(ns_id, offset);
++}
++
++unsigned long nvmpg_offset_to_bkey_offset(unsigned long nvmpg_offset)
++{
++ int ns_id;
++ unsigned long sector;
++
++ ns_id = BCH_NVMPG_GET_NS_ID(nvmpg_offset);
++ sector = BCH_NVMPG_GET_OFFSET(nvmpg_offset) >> 9;
++ bug_on_bkey_offset_limit(sector);
++
++ return ((sector & BCH_BKEY_OFFSET_MASK) |
++ ((ns_id & BCH_BKEY_OFFSET_NS_ID_MASK) << BCH_BKEY_OFFSET_BITS));
++}
++
++void *bkey_offset_to_nvmpg_ptr(unsigned long bkey_offset)
++{
++ unsigned long nvmpg_offset;
++
++ nvmpg_offset = bkey_offset_to_nvmpg_offset(bkey_offset);
++ return bch_nvmpg_offset_to_ptr(nvmpg_offset);
++}
++
+ static inline void reserve_nvmpg_pages(struct bch_nvmpg_ns *ns,
+ pgoff_t pgoff, u64 nr)
+ {
+diff --git a/drivers/md/bcache/nvmpg.h b/drivers/md/bcache/nvmpg.h
+index a8a36a6caebb..c187185d0c3f 100644
+--- a/drivers/md/bcache/nvmpg.h
++++ b/drivers/md/bcache/nvmpg.h
+@@ -84,8 +84,16 @@ struct bch_nvmpg_set {
+ (BCH_NVMPG_START - BCH_NVMPG_SYSRECS_OFFSET) / \
+ sizeof(struct bch_nvmpg_recs)))
+
+-void *bch_nvmpg_offset_to_ptr(unsigned long offset);
+-unsigned long bch_nvmpg_ptr_to_offset(struct bch_nvmpg_ns *ns, void *ptr);
++/* For bkey PTR_OFFSET to nvmpg namespace ID and offset convertion.
++ *
++ * PTR_OFFSET is 43 bits, the most significant 3 bits are for
++ * namespace ID. Rested 40 bits are for per-namespace offset
++ * in sectors.
++ */
++#define BCH_BKEY_OFFSET_NS_ID_BITS 3
++#define BCH_BKEY_OFFSET_NS_ID_MASK ((1UL<<BCH_BKEY_OFFSET_NS_ID_BITS) - 1)
++#define BCH_BKEY_OFFSET_BITS 40
++#define BCH_BKEY_OFFSET_MASK ((1UL<<BCH_BKEY_OFFSET_BITS) - 1)
+
+ #if defined(CONFIG_BCACHE_NVM_PAGES)
+
+@@ -97,6 +105,12 @@ void bch_nvmpg_free_pages(unsigned long nvmpg_offset, int order, const char *uui
+ void bch_nvmpg_flush(void);
+ struct bch_nvmpg_head *bch_get_nvmpg_head(const char *uuid);
+ struct bch_nvmpg_ns *bch_nvmpg_id_to_ns(int ns_id);
++unsigned long bkey_offset_to_nvmpg_offset(unsigned long bkey_offset);
++unsigned long nvmpg_offset_to_bkey_offset(unsigned long nvmpg_offset);
++void *bkey_offset_to_nvmpg_ptr(unsigned long bkey_offset);
++void *bch_nvmpg_offset_to_ptr(unsigned long offset);
++unsigned long bch_nvmpg_ptr_to_offset(struct bch_nvmpg_ns *ns, void *ptr);
++
+
+ #else
+
+@@ -120,11 +134,43 @@ static inline unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid)
+ static inline void bch_nvmpg_free_pages(unsigned long nvmpg_offset, int order, const char *uuid) { }
+ static inline void bch_nvmpg_flush(void) {}
+
++static inline struct bch_nvmpg_ns *bch_nvmpg_id_to_ns(int ns_id)
++{
++ return NULL;
++}
++
+ static inline struct bch_nvmpg_head *bch_get_nvmpg_head(const char *uuid)
+ {
+ return NULL;
+ }
+
++static inline unsigned long bkey_offset_to_nvmpg_offset(unsigned long bkey_offset)
++{
++ return 0;
++}
++
++static inline unsigned long nvmpg_offset_to_bkey_offset(unsigned long nvmpg_offset)
++{
++ return 0;
++}
++
++/* XXX: shoud not return NULL when NVDIMM support is not enabled */
++static inline void *bkey_offset_to_nvmpg_ptr(unsigned long bkey_offset)
++{
++ return NULL;
++}
++
++static inline void *bch_nvmpg_offset_to_ptr(unsigned long offset)
++{
++ return NULL;
++}
++
++static inline unsigned long bch_nvmpg_ptr_to_offset(struct bch_nvmpg_ns *ns, void *ptr)
++{
++ return 0;
++}
++
++
+ #endif /* CONFIG_BCACHE_NVM_PAGES */
+
+ #endif /* _BCACHE_NVM_PAGES_H */
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0015-bcache-add-KEY_NVMPG-bit-in-KEY_FIELD.patch b/for-test/nvdimm-support/meta-dev-20230303/0015-bcache-add-KEY_NVMPG-bit-in-KEY_FIELD.patch
new file mode 100644
index 0000000..c94600f
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0015-bcache-add-KEY_NVMPG-bit-in-KEY_FIELD.patch
@@ -0,0 +1,30 @@
+From 8ddd7e832cb0f838cbab5f153030f48190f8ec72 Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Mon, 4 Jul 2022 21:21:02 +0800
+Subject: [PATCH 15/16] bcache: add KEY_NVMPG bit in KEY_FIELD
+
+This patch adds KEY_NVMPG bit in KEY_FIELD, when KEY_NVMPG bit is set,
+PTR_OFFSET of the key points to a NVDIMM area. It will be used in latter
+patches to check whether a btree node is allocated from NVDIMM pages.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/bcache_ondisk.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/md/bcache/bcache_ondisk.h b/drivers/md/bcache/bcache_ondisk.h
+index 97413586195b..6c890f632197 100644
+--- a/drivers/md/bcache/bcache_ondisk.h
++++ b/drivers/md/bcache/bcache_ondisk.h
+@@ -45,7 +45,7 @@ static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \
+ KEY_FIELD(KEY_PTRS, high, 60, 3)
+ KEY_FIELD(__PAD0, high, 58, 2)
+ KEY_FIELD(KEY_CSUM, high, 56, 2)
+-KEY_FIELD(__PAD1, high, 55, 1)
++KEY_FIELD(KEY_NVMPG, high, 55, 1)
+ KEY_FIELD(KEY_DIRTY, high, 36, 1)
+
+ KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS)
+--
+2.39.2
+
diff --git a/for-test/nvdimm-support/meta-dev-20230303/0016-bcache-support-storing-bcache-btree-nodes-into-NVDIMM-meta-device.patch b/for-test/nvdimm-support/meta-dev-20230303/0016-bcache-support-storing-bcache-btree-nodes-into-NVDIMM-meta-device.patch
new file mode 100644
index 0000000..81d1def
--- /dev/null
+++ b/for-test/nvdimm-support/meta-dev-20230303/0016-bcache-support-storing-bcache-btree-nodes-into-NVDIMM-meta-device.patch
@@ -0,0 +1,560 @@
+From b3a2634cd2ac86de5e7ac607104db6866b1b9f6b Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Tue, 5 Jul 2022 23:24:18 +0800
+Subject: [PATCH 16/16] bcache: support storing bcache btree nodes into NVDIMM
+ meta device
+
+WIP.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/alloc.c | 45 ++++++++++
+ drivers/md/bcache/bcache.h | 1 +
+ drivers/md/bcache/btree.c | 163 +++++++++++++++++++++++++++++++-----
+ drivers/md/bcache/extents.c | 55 ++++++++----
+ drivers/md/bcache/journal.c | 3 +
+ drivers/md/bcache/request.c | 13 ++-
+ 6 files changed, 241 insertions(+), 39 deletions(-)
+
+diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
+index ce13c272c387..54ccf83b7261 100644
+--- a/drivers/md/bcache/alloc.c
++++ b/drivers/md/bcache/alloc.c
+@@ -63,6 +63,7 @@
+
+ #include "bcache.h"
+ #include "btree.h"
++#include "nvmpg.h"
+
+ #include <linux/blkdev.h>
+ #include <linux/kthread.h>
+@@ -477,14 +478,58 @@ void __bch_bucket_free(struct cache *ca, struct bucket *b)
+ }
+ }
+
++static void __bch_nvmpg_bucket_free(struct cache_set *c, struct bkey *k)
++{
++ int order;
++ unsigned long nvmpg_offset;
++
++ order = ilog2(c->cache->sb.bucket_size / PAGE_SECTORS);
++ nvmpg_offset = bkey_offset_to_nvmpg_offset(PTR_OFFSET(k, 0));
++ memset(bch_nvmpg_offset_to_ptr(nvmpg_offset), 0, 1<<order);
++ bch_nvmpg_free_pages(nvmpg_offset, order, c->set_uuid);
++}
++
+ void bch_bucket_free(struct cache_set *c, struct bkey *k)
+ {
+ unsigned int i;
+
++ if (KEY_NVMPG(k)) {
++ __bch_nvmpg_bucket_free(c, k);
++ return;
++ }
++
+ for (i = 0; i < KEY_PTRS(k); i++)
+ __bch_bucket_free(c->cache, PTR_BUCKET(c, k, i));
+ }
+
++int __bch_nvmpg_bucket_alloc(struct cache_set *c, struct bkey *k)
++{
++ struct cache *ca;
++ unsigned long nvmpg_offset, bkey_offset;
++ int order;
++
++ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags)))
++ return -1;
++
++ lockdep_assert_held(&c->bucket_lock);
++
++ ca = c->cache;
++ order = ilog2(ca->sb.bucket_size / PAGE_SECTORS);
++ nvmpg_offset = bch_nvmpg_alloc_pages(order, c->set_uuid);
++ if (!nvmpg_offset)
++ goto err;
++
++ bkey_offset = nvmpg_offset_to_bkey_offset(nvmpg_offset);
++ bkey_init(k);
++ SET_KEY_NVMPG(k, true);
++ k->ptr[0] = MAKE_PTR(0, bkey_offset, ca->sb.nr_this_dev);
++ SET_KEY_PTRS(k, 1);
++
++ return 0;
++err:
++ return -1;
++}
++
+ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
+ struct bkey *k, bool wait)
+ {
+diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
+index 2acda9cea0f9..395c923d68cf 100644
+--- a/drivers/md/bcache/bcache.h
++++ b/drivers/md/bcache/bcache.h
+@@ -991,6 +991,7 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k,
+ unsigned int sectors, unsigned int write_point,
+ unsigned int write_prio, bool wait);
+ bool bch_cached_dev_error(struct cached_dev *dc);
++int __bch_nvmpg_bucket_alloc(struct cache_set *c, struct bkey *k);
+
+ __printf(2, 3)
+ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
+diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
+index 6a90c33109c7..022a227f0c02 100644
+--- a/drivers/md/bcache/btree.c
++++ b/drivers/md/bcache/btree.c
+@@ -25,6 +25,8 @@
+ #include "btree.h"
+ #include "debug.h"
+ #include "extents.h"
++#include "features.h"
++#include "nvmpg.h"
+
+ #include <linux/slab.h>
+ #include <linux/bitops.h>
+@@ -129,6 +131,9 @@ void bkey_put(struct cache_set *c, struct bkey *k)
+ {
+ unsigned int i;
+
++ if (KEY_NVMPG(k))
++ return;
++
+ for (i = 0; i < KEY_PTRS(k); i++)
+ if (ptr_available(c, k, i))
+ atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin);
+@@ -170,6 +175,10 @@ void bch_btree_node_read_done(struct btree *b)
+ for (;
+ b->written < btree_blocks(b) && i->seq == b->keys.set[0].data->seq;
+ i = write_block(b)) {
++ err = "bad magic";
++ if (i->magic != bset_magic(&b->c->cache->sb))
++ goto err;
++
+ err = "unsupported bset version";
+ if (i->version > BCACHE_BSET_VERSION)
+ goto err;
+@@ -179,10 +188,6 @@ void bch_btree_node_read_done(struct btree *b)
+ btree_blocks(b))
+ goto err;
+
+- err = "bad magic";
+- if (i->magic != bset_magic(&b->c->cache->sb))
+- goto err;
+-
+ err = "bad checksum";
+ switch (i->version) {
+ case 0:
+@@ -227,9 +232,15 @@ void bch_btree_node_read_done(struct btree *b)
+ return;
+ err:
+ set_btree_node_io_error(b);
+- bch_cache_set_error(b->c, "%s at bucket %zu, block %u, %u keys",
+- err, PTR_BUCKET_NR(b->c, &b->key, 0),
+- bset_block_offset(b, i), i->keys);
++ if (!KEY_NVMPG(&b->key))
++ bch_cache_set_error(b->c, "%s at bucket %zu, block %u, %u keys",
++ err, PTR_BUCKET_NR(b->c, &b->key, 0),
++ bset_block_offset(b, i), i->keys);
++ else
++ bch_cache_set_error(b->c, "%s at addr %p, block %u, %u keys",
++ err, bkey_offset_to_nvmpg_ptr(PTR_OFFSET(&b->key, 0)),
++ bset_block_offset(b, i), i->keys);
++
+ goto out;
+ }
+
+@@ -240,7 +251,7 @@ static void btree_node_read_endio(struct bio *bio)
+ closure_put(cl);
+ }
+
+-static void bch_btree_node_read(struct btree *b)
++static void __bch_btree_node_read(struct btree *b)
+ {
+ uint64_t start_time = local_clock();
+ struct closure cl;
+@@ -278,6 +289,28 @@ static void bch_btree_node_read(struct btree *b)
+ PTR_BUCKET_NR(b->c, &b->key, 0));
+ }
+
++static void __bch_nvmpg_btree_node_read(struct btree *b)
++{
++ uint64_t start_time = local_clock();
++ void *ptr;
++
++ ptr = bkey_offset_to_nvmpg_ptr(PTR_OFFSET(&b->key, 0));
++ memcpy(b->keys.set[0].data, ptr, KEY_SIZE(&b->key) << 9);
++
++ bch_btree_node_read_done(b);
++ bch_time_stats_update(&b->c->btree_read_time, start_time);
++}
++
++static void bch_btree_node_read(struct btree *b)
++{
++ trace_bcache_btree_read(b);
++
++ if (!KEY_NVMPG(&b->key))
++ __bch_btree_node_read(b);
++ else
++ __bch_nvmpg_btree_node_read(b);
++}
++
+ static void btree_complete_write(struct btree *b, struct btree_write *w)
+ {
+ if (w->prio_blocked &&
+@@ -335,7 +368,7 @@ static void btree_node_write_endio(struct bio *bio)
+ closure_put(cl);
+ }
+
+-static void do_btree_node_write(struct btree *b)
++static void __do_btree_node_write(struct btree *b)
+ {
+ struct closure *cl = &b->io;
+ struct bset *i = btree_bset_last(b);
+@@ -400,6 +433,68 @@ static void do_btree_node_write(struct btree *b)
+ }
+ }
+
++static void btree_nvmpg_complete_write(struct btree *b, struct btree_write *w)
++{
++ atomic_sub(w->prio_blocked, &b->c->prio_blocked);
++
++ if (w->journal) {
++ atomic_dec_bug(w->journal);
++ __closure_wake_up(&b->c->journal.wait);
++ }
++
++ w->prio_blocked = 0;
++ w->journal = NULL;
++}
++
++static void btree_nvmpg_node_write_done(struct closure *cl)
++{
++ struct btree *b = container_of(cl, struct btree, io);
++ struct btree_write *w = btree_prev_write(b);
++
++ btree_nvmpg_complete_write(b, w);
++
++ if (btree_node_dirty(b))
++ queue_delayed_work(btree_io_wq, &b->work, 30 * HZ);
++
++ closure_return_with_destructor(cl, btree_node_write_unlock);
++}
++
++static void __do_nvmpg_btree_node_write(struct btree *b)
++{
++ struct closure *cl = &b->io;
++ struct bset *i = btree_bset_last(b);
++ unsigned long nvmpg_offset;
++ void *nvmpg_ptr;
++
++ i->version = BCACHE_BSET_VERSION;
++ i->csum = btree_csum_set(b, i);
++
++ BUG_ON(b->bio);
++
++ /* Calculate location to write */
++ nvmpg_offset = bkey_offset_to_nvmpg_offset(PTR_OFFSET(&b->key, 0));
++ nvmpg_ptr = bch_nvmpg_offset_to_ptr(nvmpg_offset) +
++ bset_byte_offset(&b->keys, i);
++
++ if (b->level > 0)
++ memcpy_flushcache(nvmpg_ptr, i,
++ roundup(set_bytes(i), block_bytes(b->c->cache)));
++ else
++ memcpy(nvmpg_ptr, i,
++ roundup(set_bytes(i), block_bytes(b->c->cache)));
++
++ closure_sync(cl);
++ continue_at_nobarrier(cl, btree_nvmpg_node_write_done, NULL);
++}
++
++static void do_btree_node_write(struct btree *b)
++{
++ if (!KEY_NVMPG(&b->key))
++ __do_btree_node_write(b);
++ else
++ __do_nvmpg_btree_node_write(b);
++}
++
+ void __bch_btree_node_write(struct btree *b, struct closure *parent)
+ {
+ struct bset *i = btree_bset_last(b);
+@@ -535,6 +630,9 @@ static void mca_bucket_free(struct btree *b)
+ {
+ BUG_ON(btree_node_dirty(b));
+
++ if (KEY_NVMPG(&b->key))
++ SET_KEY_NVMPG(&b->key, false);
++
+ b->key.ptr[0] = 0;
+ hlist_del_init_rcu(&b->hash);
+ list_move(&b->list, &b->c->btree_cache_freeable);
+@@ -1091,13 +1189,25 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
+ {
+ BKEY_PADDED(key) k;
+ struct btree *b = ERR_PTR(-EAGAIN);
++ int err = -1;
+
+ mutex_lock(&c->bucket_lock);
+ retry:
+- if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait))
+- goto err;
++ /*
++ * If nvdimm_meta feature is enabled, try to allocate btree
++ * node from NVDIMM pages and set KEY_NVMPG bit successfully.
++ */
++ if (bch_has_feature_nvdimm_meta(&(c->cache->sb)))
++ err = __bch_nvmpg_bucket_alloc(c, &k.key);
++
++ if (err < 0) {
++ err = __bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait);
++ if (!err)
++ bkey_put(c, &k.key);
++ else
++ goto err;
++ }
+
+- bkey_put(c, &k.key);
+ SET_KEY_SIZE(&k.key, c->btree_pages * PAGE_SECTORS);
+
+ b = mca_alloc(c, op, &k.key, level);
+@@ -1159,10 +1269,12 @@ static void make_btree_freeing_key(struct btree *b, struct bkey *k)
+ bkey_copy(k, &b->key);
+ bkey_copy_key(k, &ZERO_KEY);
+
+- for (i = 0; i < KEY_PTRS(k); i++)
+- SET_PTR_GEN(k, i,
+- bch_inc_gen(b->c->cache,
+- PTR_BUCKET(b->c, &b->key, i)));
++ if (!KEY_NVMPG(&b->key)) {
++ for (i = 0; i < KEY_PTRS(k); i++)
++ SET_PTR_GEN(k, i,
++ bch_inc_gen(b->c->cache,
++ PTR_BUCKET(b->c, &b->key, i)));
++ }
+
+ mutex_unlock(&b->c->bucket_lock);
+ }
+@@ -1205,6 +1317,9 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
+ if (!bkey_cmp(k, &ZERO_KEY))
+ return stale;
+
++ if (KEY_NVMPG(k))
++ return stale;
++
+ for (i = 0; i < KEY_PTRS(k); i++) {
+ if (!ptr_available(c, k, i))
+ continue;
+@@ -1248,6 +1363,9 @@ void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k)
+ {
+ unsigned int i;
+
++ if (KEY_NVMPG(k))
++ return;
++
+ for (i = 0; i < KEY_PTRS(k); i++)
+ if (ptr_available(c, k, i) &&
+ !ptr_stale(c, k, i)) {
+@@ -1748,10 +1866,14 @@ static void bch_btree_gc_finish(struct cache_set *c)
+
+ spin_lock(&dc->writeback_keys.lock);
+ rbtree_postorder_for_each_entry_safe(w, n,
+- &dc->writeback_keys.keys, node)
++ &dc->writeback_keys.keys, node) {
++ if (KEY_NVMPG(&w->key))
++ continue;
++
+ for (j = 0; j < KEY_PTRS(&w->key); j++)
+ SET_GC_MARK(PTR_BUCKET(c, &w->key, j),
+ GC_MARK_DIRTY);
++ }
+ spin_unlock(&dc->writeback_keys.lock);
+ }
+ rcu_read_unlock();
+@@ -2480,8 +2602,11 @@ void bch_btree_set_root(struct btree *b)
+
+ BUG_ON(!b->written);
+
+- for (i = 0; i < KEY_PTRS(&b->key); i++)
+- BUG_ON(PTR_BUCKET(b->c, &b->key, i)->prio != BTREE_PRIO);
++ if (!KEY_NVMPG(&b->key)) {
++ for (i = 0; i < KEY_PTRS(&b->key); i++)
++ BUG_ON(PTR_BUCKET(b->c, &b->key, i)->prio !=
++ BTREE_PRIO);
++ }
+
+ mutex_lock(&b->c->bucket_lock);
+ list_del_init(&b->list);
+diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
+index d626ffcbecb9..4b11d857f091 100644
+--- a/drivers/md/bcache/extents.c
++++ b/drivers/md/bcache/extents.c
+@@ -51,13 +51,18 @@ static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
+ for (i = 0; i < KEY_PTRS(k); i++)
+ if (ptr_available(c, k, i)) {
+ struct cache *ca = c->cache;
+- size_t bucket = PTR_BUCKET_NR(c, k, i);
+ size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
+
+- if (KEY_SIZE(k) + r > c->cache->sb.bucket_size ||
+- bucket < ca->sb.first_bucket ||
+- bucket >= ca->sb.nbuckets)
++ if (KEY_SIZE(k) + r > c->cache->sb.bucket_size)
+ return true;
++
++ if (!KEY_NVMPG(k)) {
++ size_t bucket = PTR_BUCKET_NR(c, k, i);
++
++ if (bucket < ca->sb.first_bucket ||
++ bucket >= ca->sb.nbuckets)
++ return true;
++ }
+ }
+
+ return false;
+@@ -72,17 +77,20 @@ static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
+ for (i = 0; i < KEY_PTRS(k); i++)
+ if (ptr_available(c, k, i)) {
+ struct cache *ca = c->cache;
+- size_t bucket = PTR_BUCKET_NR(c, k, i);
+ size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
+
+ if (KEY_SIZE(k) + r > c->cache->sb.bucket_size)
+ return "bad, length too big";
+- if (bucket < ca->sb.first_bucket)
+- return "bad, short offset";
+- if (bucket >= ca->sb.nbuckets)
+- return "bad, offset past end of device";
+- if (ptr_stale(c, k, i))
+- return "stale";
++ if (!KEY_NVMPG(k)) {
++ size_t bucket = PTR_BUCKET_NR(c, k, i);
++
++ if (bucket < ca->sb.first_bucket)
++ return "bad, short offset";
++ if (bucket >= ca->sb.nbuckets)
++ return "bad, offset past end of device";
++ if (ptr_stale(c, k, i))
++ return "stale";
++ }
+ }
+
+ if (!bkey_cmp(k, &ZERO_KEY))
+@@ -129,6 +137,9 @@ static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
+ unsigned int j;
+ char buf[80];
+
++ if (KEY_NVMPG(k))
++ return;
++
+ bch_extent_to_text(buf, sizeof(buf), k);
+ pr_cont(" %s", buf);
+
+@@ -176,6 +187,9 @@ static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
+ char buf[80];
+ struct bucket *g;
+
++ if (KEY_NVMPG(k))
++ return false;
++
+ if (mutex_trylock(&b->c->bucket_lock)) {
+ for (i = 0; i < KEY_PTRS(k); i++)
+ if (ptr_available(b->c, k, i)) {
+@@ -212,10 +226,12 @@ static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k)
+ bch_ptr_invalid(bk, k))
+ return true;
+
+- for (i = 0; i < KEY_PTRS(k); i++)
+- if (!ptr_available(b->c, k, i) ||
+- ptr_stale(b->c, k, i))
++ for (i = 0; i < KEY_PTRS(k); i++) {
++ if (!ptr_available(b->c, k, i))
++ return true;
++ if (!KEY_NVMPG(k) && ptr_stale(b->c, k, i))
+ return true;
++ }
+
+ if (expensive_debug_checks(b->c) &&
+ btree_ptr_bad_expensive(b, k))
+@@ -507,9 +523,13 @@ static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
+ static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
+ unsigned int ptr)
+ {
+- struct bucket *g = PTR_BUCKET(b->c, k, ptr);
++ struct bucket *g;
+ char buf[80];
+
++ if (KEY_NVMPG(k))
++ return false;
++
++ g = PTR_BUCKET(b->c, k, ptr);
+ if (mutex_trylock(&b->c->bucket_lock)) {
+ if (b->c->gc_mark_valid &&
+ (!GC_MARK(g) ||
+@@ -548,7 +568,7 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
+ if (!ptr_available(b->c, k, i))
+ return true;
+
+- for (i = 0; i < KEY_PTRS(k); i++) {
++ for (i = 0; (!KEY_NVMPG(k)) && (i < KEY_PTRS(k)); i++) {
+ stale = ptr_stale(b->c, k, i);
+
+ if (stale && KEY_DIRTY(k)) {
+@@ -588,6 +608,9 @@ static bool bch_extent_merge(struct btree_keys *bk,
+ if (key_merging_disabled(b->c))
+ return false;
+
++ if (KEY_NVMPG(l) || KEY_NVMPG(r))
++ return false;
++
+ for (i = 0; i < KEY_PTRS(l); i++)
+ if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
+ PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
+diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
+index 24615df1f4e6..85a20e081f12 100644
+--- a/drivers/md/bcache/journal.c
++++ b/drivers/md/bcache/journal.c
+@@ -382,6 +382,9 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
+ if (!__bch_extent_invalid(c, k)) {
+ unsigned int j;
+
++ if (KEY_NVMPG(k))
++ continue;
++
+ for (j = 0; j < KEY_PTRS(k); j++)
+ if (ptr_available(c, k, j))
+ atomic_inc(&PTR_BUCKET(c, k, j)->pin);
+diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
+index f2c5a7e06fa9..4a5d75e8a2dd 100644
+--- a/drivers/md/bcache/request.c
++++ b/drivers/md/bcache/request.c
+@@ -232,9 +232,11 @@ static void bch_data_insert_start(struct closure *cl)
+ if (op->writeback) {
+ SET_KEY_DIRTY(k, true);
+
+- for (i = 0; i < KEY_PTRS(k); i++)
+- SET_GC_MARK(PTR_BUCKET(op->c, k, i),
+- GC_MARK_DIRTY);
++ if (!KEY_NVMPG(k)) {
++ for (i = 0; i < KEY_PTRS(k); i++)
++ SET_GC_MARK(PTR_BUCKET(op->c, k, i),
++ GC_MARK_DIRTY);
++ }
+ }
+
+ SET_KEY_CSUM(k, op->csum);
+@@ -542,7 +544,10 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
+ /* XXX: figure out best pointer - for multiple cache devices */
+ ptr = 0;
+
+- PTR_BUCKET(b->c, k, ptr)->prio = INITIAL_PRIO;
++ if (!KEY_NVMPG(k))
++ PTR_BUCKET(b->c, k, ptr)->prio = INITIAL_PRIO;
++ else
++ pr_err("nvmpg key should not show up here.\n");
+
+ if (KEY_DIRTY(k))
+ s->read_dirty_data = true;
+--
+2.39.2
+