diff options
author | Coly Li <colyli@suse.de> | 2019-05-21 22:40:29 +0800 |
---|---|---|
committer | Coly Li <colyli@suse.de> | 2019-05-21 22:40:29 +0800 |
commit | 13430b80827436604f5f649aa9f4833f5fa03892 (patch) | |
tree | b3d1acdbbb95c4e8b547ba8a006eafb90255b668 | |
parent | 03d4e899a667696d44b08d3564e1fa57d181fab2 (diff) | |
download | bcache-patches-13430b80827436604f5f649aa9f4833f5fa03892.tar.gz |
for-next: add patches for 5.3
for-test: move some patches to for-next, remove some unless patches.
15 files changed, 278 insertions, 974 deletions
diff --git a/for-test/0001-bcache-ignore-read-ahead-request-failure-on-backing-.patch b/for-next/0001-bcache-ignore-read-ahead-request-failure-on-backing-.patch index 1338418..008f591 100644 --- a/for-test/0001-bcache-ignore-read-ahead-request-failure-on-backing-.patch +++ b/for-next/0001-bcache-ignore-read-ahead-request-failure-on-backing-.patch @@ -1,7 +1,7 @@ From 31dc685d78b6f77ddd3d4ffa97478431a6602ed9 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Mon, 13 May 2019 22:48:09 +0800 -Subject: [PATCH 1/5] bcache: ignore read-ahead request failure on backing +Subject: [PATCH 1/9] bcache: ignore read-ahead request failure on backing device When md raid device (e.g. raid456) is used as backing device, read-ahead diff --git a/for-test/0002-bcache-add-io-error-counting-in-write_bdev_super_end.patch b/for-next/0002-bcache-add-io-error-counting-in-write_bdev_super_end.patch index 21e2ad5..234e935 100644 --- a/for-test/0002-bcache-add-io-error-counting-in-write_bdev_super_end.patch +++ b/for-next/0002-bcache-add-io-error-counting-in-write_bdev_super_end.patch @@ -1,7 +1,7 @@ From 1ccada2ebb2f37fbe2b0a3705a3166e4f3f8d2fb Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Mon, 13 May 2019 23:42:39 +0800 -Subject: [PATCH 2/5] bcache: add io error counting in write_bdev_super_endio() +Subject: [PATCH 2/9] bcache: add io error counting in write_bdev_super_endio() When backing device super block is written by bch_write_bdev_super(), the bio complete callback write_bdev_super_endio() simply ignores I/O diff --git a/for-test/0003-bcache-remove-XXX-comment-line-from-run_cache_set.patch b/for-next/0003-bcache-remove-XXX-comment-line-from-run_cache_set.patch index 9243605..d71e796 100644 --- a/for-test/0003-bcache-remove-XXX-comment-line-from-run_cache_set.patch +++ b/for-next/0003-bcache-remove-XXX-comment-line-from-run_cache_set.patch @@ -1,7 +1,7 @@ From 5e92305f8838785b2c42ed2cb8c5f2bc03103e94 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Mon, 13 May 2019 23:47:38 +0800 -Subject: [PATCH 3/5] bcache: remove "XXX:" comment line from run_cache_set() +Subject: [PATCH 3/9] bcache: remove "XXX:" comment line from run_cache_set() In previous bcache patches for Linux v5.2, the failure code path of run_cache_set() is tested and fixed. So now the following comment diff --git a/for-test/0004-bcache-remove-unnecessary-prefetch-in-bset_search_tr.patch b/for-next/0004-bcache-remove-unnecessary-prefetch-in-bset_search_tr.patch index d8f996a..a06fd72 100644 --- a/for-test/0004-bcache-remove-unnecessary-prefetch-in-bset_search_tr.patch +++ b/for-next/0004-bcache-remove-unnecessary-prefetch-in-bset_search_tr.patch @@ -1,7 +1,7 @@ From 77980a54c7e90525e8cada5b75bc44daa214d9e5 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Tue, 14 May 2019 22:23:35 +0800 -Subject: [PATCH 4/5] bcache: remove unnecessary prefetch() in +Subject: [PATCH 4/9] bcache: remove unnecessary prefetch() in bset_search_tree() In function bset_search_tree(), when p >= t->size, t->tree[0] will be diff --git a/for-test/0005-bcache-make-bset_search_tree-be-more-understandable.patch b/for-next/0005-bcache-make-bset_search_tree-be-more-understandable.patch index 1ed7fae..856fea9 100644 --- a/for-test/0005-bcache-make-bset_search_tree-be-more-understandable.patch +++ b/for-next/0005-bcache-make-bset_search_tree-be-more-understandable.patch @@ -1,7 +1,7 @@ From 5e31e419f54eb8db7f4e95bf9328523e801c1dfb Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Tue, 14 May 2019 22:51:40 +0800 -Subject: [PATCH 5/5] bcache: make bset_search_tree() be more understandable +Subject: [PATCH 5/9] bcache: make bset_search_tree() be more understandable The purpose of following code in bset_search_tree() is to avoid a branch instruction, diff --git a/for-next/0006-bcache-use-sysfs_match_string-instead-of-__sysfs_mat.patch b/for-next/0006-bcache-use-sysfs_match_string-instead-of-__sysfs_mat.patch new file mode 100644 index 0000000..0f81d43 --- /dev/null +++ b/for-next/0006-bcache-use-sysfs_match_string-instead-of-__sysfs_mat.patch @@ -0,0 +1,97 @@ +From a6bdbacfd6a798edcceaaae5fe48fd2403a4c93a Mon Sep 17 00:00:00 2001 +From: Alexandru Ardelean <alexandru.ardelean@analog.com> +Date: Tue, 7 May 2019 12:43:12 +0300 +Subject: [PATCH 6/9] bcache: use sysfs_match_string() instead of + __sysfs_match_string() + +The arrays (of strings) that are passed to __sysfs_match_string() are +static, so use sysfs_match_string() which does an implicit ARRAY_SIZE() +over these arrays. + +Functionally, this doesn't change anything. +The change is more cosmetic. + +It only shrinks the static arrays by 1 byte each. + +Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com> +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/sysfs.c | 20 ++++++++------------ + 1 file changed, 8 insertions(+), 12 deletions(-) + +diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c +index 6cd44d3cf906..3a520262933d 100644 +--- a/drivers/md/bcache/sysfs.c ++++ b/drivers/md/bcache/sysfs.c +@@ -21,28 +21,24 @@ static const char * const bch_cache_modes[] = { + "writethrough", + "writeback", + "writearound", +- "none", +- NULL ++ "none" + }; + + /* Default is 0 ("auto") */ + static const char * const bch_stop_on_failure_modes[] = { + "auto", +- "always", +- NULL ++ "always" + }; + + static const char * const cache_replacement_policies[] = { + "lru", + "fifo", +- "random", +- NULL ++ "random" + }; + + static const char * const error_actions[] = { + "unregister", +- "panic", +- NULL ++ "panic" + }; + + write_attribute(attach); +@@ -333,7 +329,7 @@ STORE(__cached_dev) + bch_cached_dev_run(dc); + + if (attr == &sysfs_cache_mode) { +- v = __sysfs_match_string(bch_cache_modes, -1, buf); ++ v = sysfs_match_string(bch_cache_modes, buf); + if (v < 0) + return v; + +@@ -344,7 +340,7 @@ STORE(__cached_dev) + } + + if (attr == &sysfs_stop_when_cache_set_failed) { +- v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf); ++ v = sysfs_match_string(bch_stop_on_failure_modes, buf); + if (v < 0) + return v; + +@@ -794,7 +790,7 @@ STORE(__bch_cache_set) + 0, UINT_MAX); + + if (attr == &sysfs_errors) { +- v = __sysfs_match_string(error_actions, -1, buf); ++ v = sysfs_match_string(error_actions, buf); + if (v < 0) + return v; + +@@ -1058,7 +1054,7 @@ STORE(__bch_cache) + } + + if (attr == &sysfs_cache_replacement_policy) { +- v = __sysfs_match_string(cache_replacement_policies, -1, buf); ++ v = sysfs_match_string(cache_replacement_policies, buf); + if (v < 0) + return v; + +-- +2.16.4 + diff --git a/for-next/0007-bcache-add-return-value-check-to-bch_cached_dev_run.patch b/for-next/0007-bcache-add-return-value-check-to-bch_cached_dev_run.patch new file mode 100644 index 0000000..177854b --- /dev/null +++ b/for-next/0007-bcache-add-return-value-check-to-bch_cached_dev_run.patch @@ -0,0 +1,151 @@ +From 8e29cf9a5dc6f5c7e06f7c9bc516a1e03a998f85 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Tue, 21 May 2019 22:16:38 +0800 +Subject: [PATCH 7/9] bcache: add return value check to bch_cached_dev_run() + +This patch adds return value check to bch_cached_dev_run(), now if there +is error happens inside bch_cached_dev_run(), it can be catched. + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/bcache.h | 2 +- + drivers/md/bcache/super.c | 32 +++++++++++++++++++++++++------- + drivers/md/bcache/sysfs.c | 7 +++++-- + 3 files changed, 31 insertions(+), 10 deletions(-) + +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index fdf75352e16a..73a97586a2ef 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -1006,7 +1006,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size); + int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, + uint8_t *set_uuid); + void bch_cached_dev_detach(struct cached_dev *dc); +-void bch_cached_dev_run(struct cached_dev *dc); ++int bch_cached_dev_run(struct cached_dev *dc); + void bcache_device_stop(struct bcache_device *d); + + void bch_cache_set_unregister(struct cache_set *c); +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 9d9f852852c6..34653cdcdd4a 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -910,7 +910,7 @@ static int cached_dev_status_update(void *arg) + } + + +-void bch_cached_dev_run(struct cached_dev *dc) ++int bch_cached_dev_run(struct cached_dev *dc) + { + struct bcache_device *d = &dc->disk; + char *buf = kmemdup_nul(dc->sb.label, SB_LABEL_SIZE, GFP_KERNEL); +@@ -921,11 +921,14 @@ void bch_cached_dev_run(struct cached_dev *dc) + NULL, + }; + ++ if (dc->io_disable) ++ return -EIO; ++ + if (atomic_xchg(&dc->running, 1)) { + kfree(env[1]); + kfree(env[2]); + kfree(buf); +- return; ++ return -EBUSY; + } + + if (!d->c && +@@ -951,8 +954,10 @@ void bch_cached_dev_run(struct cached_dev *dc) + kfree(buf); + + if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || +- sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) ++ sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) { + pr_debug("error creating sysfs link"); ++ return -ENOMEM; ++ } + + dc->status_update_thread = kthread_run(cached_dev_status_update, + dc, "bcache_status_update"); +@@ -961,6 +966,8 @@ void bch_cached_dev_run(struct cached_dev *dc) + "continue to run without monitoring backing " + "device status"); + } ++ ++ return 0; + } + + /* +@@ -1056,6 +1063,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, + uint32_t rtime = cpu_to_le32((u32)ktime_get_real_seconds()); + struct uuid_entry *u; + struct cached_dev *exist_dc, *t; ++ int ret = 0; + + if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) || + (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16))) +@@ -1165,7 +1173,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, + + bch_sectors_dirty_init(&dc->disk); + +- bch_cached_dev_run(dc); ++ ret = bch_cached_dev_run(dc); ++ if (ret) { ++ up_write(&dc->writeback_lock); ++ return ret; ++ } ++ + bcache_device_link(&dc->disk, c, "bdev"); + atomic_inc(&c->attached_dev_nr); + +@@ -1292,6 +1305,7 @@ static int register_bdev(struct cache_sb *sb, struct page *sb_page, + { + const char *err = "cannot allocate memory"; + struct cache_set *c; ++ int ret = -ENOMEM; + + bdevname(bdev, dc->backing_dev_name); + memcpy(&dc->sb, sb, sizeof(struct cache_sb)); +@@ -1321,14 +1335,18 @@ static int register_bdev(struct cache_sb *sb, struct page *sb_page, + bch_cached_dev_attach(dc, c, NULL); + + if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE || +- BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) +- bch_cached_dev_run(dc); ++ BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) { ++ err = "failed to run cached device"; ++ ret = bch_cached_dev_run(dc); ++ if (ret) ++ goto err; ++ } + + return 0; + err: + pr_notice("error %s: %s", dc->backing_dev_name, err); + bcache_device_stop(&dc->disk); +- return -EIO; ++ return ret; + } + + /* Flash only volumes */ +diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c +index 3a520262933d..129031663cc8 100644 +--- a/drivers/md/bcache/sysfs.c ++++ b/drivers/md/bcache/sysfs.c +@@ -325,8 +325,11 @@ STORE(__cached_dev) + bch_cache_accounting_clear(&dc->accounting); + + if (attr == &sysfs_running && +- strtoul_or_return(buf)) +- bch_cached_dev_run(dc); ++ strtoul_or_return(buf)) { ++ v = bch_cached_dev_run(dc); ++ if (v) ++ return v; ++ } + + if (attr == &sysfs_cache_mode) { + v = sysfs_match_string(bch_cache_modes, buf); +-- +2.16.4 + diff --git a/for-test/0001-bcache-wake-up-allocator-thread-before-waiting-for-a.patch b/for-next/0008-bcache-wake-up-allocator-thread-before-waiting-for-a.patch index b818c1a..494d81a 100644 --- a/for-test/0001-bcache-wake-up-allocator-thread-before-waiting-for-a.patch +++ b/for-next/0008-bcache-wake-up-allocator-thread-before-waiting-for-a.patch @@ -1,8 +1,8 @@ -From e8dbb32672a5f1fd6087c0763af159bf95bc2510 Mon Sep 17 00:00:00 2001 +From bc49a3f47899f6f24a9eb555cf94d5d72f724ed8 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Mon, 19 Mar 2018 23:39:32 +0800 -Subject: [PATCH] bcache: wake up allocator thread before waiting for available - buckets +Subject: [PATCH 8/9] bcache: wake up allocator thread before waiting for + available buckets Currently alloc.c:bch_bucket_alloc() wakes up allocator thread after it pops an available bucket from one of ca->free[] lists. And this is @@ -30,10 +30,10 @@ Signed-off-by: Coly Li <colyli@suse.de> 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c -index 458e1d38577d..06cabbc8eded 100644 +index f8986effcb50..c543f378abb4 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c -@@ -402,6 +402,14 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) +@@ -408,6 +408,14 @@ long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait) TASK_UNINTERRUPTIBLE); mutex_unlock(&ca->set->bucket_lock); @@ -48,7 +48,7 @@ index 458e1d38577d..06cabbc8eded 100644 schedule(); mutex_lock(&ca->set->bucket_lock); } while (!fifo_pop(&ca->free[RESERVE_NONE], r) && -@@ -409,9 +417,6 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) +@@ -415,9 +423,6 @@ long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait) finish_wait(&ca->set->bucket_wait, &w); out: @@ -59,5 +59,5 @@ index 458e1d38577d..06cabbc8eded 100644 if (expensive_debug_checks(ca->set)) { -- -2.16.2 +2.16.4 diff --git a/for-test/0001-bcache-remove-unncessary-code-in-bch_btree_keys_init.patch b/for-next/0009-bcache-remove-unncessary-code-in-bch_btree_keys_init.patch index f7b921e..7cf14ad 100644 --- a/for-test/0001-bcache-remove-unncessary-code-in-bch_btree_keys_init.patch +++ b/for-next/0009-bcache-remove-unncessary-code-in-bch_btree_keys_init.patch @@ -1,7 +1,7 @@ -From 7e3fd7a0aa93b4b024b62a4c56165c249c1061b4 Mon Sep 17 00:00:00 2001 +From b2dbb2ca3139d14e63cf70604ac057025e9488df Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> -Date: Thu, 12 Apr 2018 14:27:07 +0800 -Subject: [PATCH 1/2] bcache: remove unncessary code in bch_btree_keys_init() +Date: Tue, 21 May 2019 22:36:35 +0800 +Subject: [PATCH 9/9] bcache: remove unncessary code in bch_btree_keys_init() Function bch_btree_keys_init() initializes b->set[].size and b->set[].data to zero. As the code comments indicates, these code indeed @@ -15,15 +15,22 @@ bch_btree_keys_init() and mca_bucket_alloc() to explain why it's safe. Signed-off-by: Coly Li <colyli@suse.de> --- - drivers/md/bcache/bset.c | 13 ++++++------- + drivers/md/bcache/bset.c | 15 ++++++--------- drivers/md/bcache/btree.c | 4 ++++ - 2 files changed, 10 insertions(+), 7 deletions(-) + 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c -index 579c696a5fe0..343f4e9428e0 100644 +index f752cc791f50..32e2e4d8fa6c 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c -@@ -352,15 +352,14 @@ void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops, +@@ -347,22 +347,19 @@ EXPORT_SYMBOL(bch_btree_keys_alloc); + void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops, + bool *expensive_debug_checks) + { +- unsigned int i; +- + b->ops = ops; + b->expensive_debug_checks = expensive_debug_checks; b->nsets = 0; b->last_set_unwritten = 0; @@ -46,10 +53,10 @@ index 579c696a5fe0..343f4e9428e0 100644 EXPORT_SYMBOL(bch_btree_keys_init); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c -index 17936b2dc7d6..344641e23415 100644 +index 773f5fdad25f..cf38a1b031fa 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c -@@ -600,6 +600,10 @@ static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) +@@ -613,6 +613,10 @@ static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) static struct btree *mca_bucket_alloc(struct cache_set *c, struct bkey *k, gfp_t gfp) { @@ -58,8 +65,8 @@ index 17936b2dc7d6..344641e23415 100644 + * see code comments in bch_btree_keys_init(). + */ struct btree *b = kzalloc(sizeof(struct btree), gfp); + if (!b) - return NULL; -- -2.16.2 +2.16.4 diff --git a/for-test/0001-bcache-add-return-value-check-for-bch_cached_dev_run.patch b/for-test/0001-bcache-add-return-value-check-for-bch_cached_dev_run.patch deleted file mode 100644 index 00df0d6..0000000 --- a/for-test/0001-bcache-add-return-value-check-for-bch_cached_dev_run.patch +++ /dev/null @@ -1,113 +0,0 @@ -From cba335c9e4966246bb74f705a876d3e88255abeb Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Tue, 28 Nov 2017 01:36:13 +0800 -Subject: [PATCH] bcache: add return value check for bch_cached_dev_run() - ---- - drivers/md/bcache/bcache.h | 2 +- - drivers/md/bcache/super.c | 22 +++++++++++++++++----- - drivers/md/bcache/sysfs.c | 7 +++++-- - 3 files changed, 23 insertions(+), 8 deletions(-) - -diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h -index 82e2bb39f8ed..fbf0bbb88e5b 100644 ---- a/drivers/md/bcache/bcache.h -+++ b/drivers/md/bcache/bcache.h -@@ -911,7 +911,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size); - - int bch_cached_dev_attach(struct cached_dev *, struct cache_set *); - void bch_cached_dev_detach(struct cached_dev *); --void bch_cached_dev_run(struct cached_dev *); -+int bch_cached_dev_run(struct cached_dev *); - void bcache_device_stop(struct bcache_device *); - - void bch_cache_set_unregister(struct cache_set *); -diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c -index 27c0317f5b6b..a932302c33dc 100644 ---- a/drivers/md/bcache/super.c -+++ b/drivers/md/bcache/super.c -@@ -862,7 +862,7 @@ static void calc_cached_dev_sectors(struct cache_set *c) - atomic64_set(&c->cached_dev_sectors,sectors); - } - --void bch_cached_dev_run(struct cached_dev *dc) -+int bch_cached_dev_run(struct cached_dev *dc) - { - struct bcache_device *d = &dc->disk; - char buf[SB_LABEL_SIZE + 1]; -@@ -873,6 +873,9 @@ void bch_cached_dev_run(struct cached_dev *dc) - NULL, - }; - -+ if (atomic_read(&dc->io_disabled)) -+ return -EIO; -+ - memcpy(buf, dc->sb.label, SB_LABEL_SIZE); - buf[SB_LABEL_SIZE] = '\0'; - env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf); -@@ -880,7 +883,7 @@ void bch_cached_dev_run(struct cached_dev *dc) - if (atomic_xchg(&dc->running, 1)) { - kfree(env[1]); - kfree(env[2]); -- return; -+ return -EBUSY; - } - - if (!d->c && -@@ -904,6 +907,8 @@ void bch_cached_dev_run(struct cached_dev *dc) - if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || - sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) - pr_debug("error creating sysfs link"); -+ -+ return 0; - } - - static void cached_dev_detach_finish(struct work_struct *w) -@@ -1073,7 +1078,11 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) - bch_writeback_queue(dc); - } - -- bch_cached_dev_run(dc); -+ ret = bch_cached_dev_run(dc); -+ if (ret) { -+ up_write(&dc->writeback_lock); -+ goto out; -+ } - bcache_device_link(&dc->disk, c, "bdev"); - - /* Allow the writeback thread to proceed */ -@@ -1217,8 +1226,11 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, - bch_cached_dev_attach(dc, c); - - if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE || -- BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) -- bch_cached_dev_run(dc); -+ BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) { -+ err = "failed to run cached device"; -+ if (bch_cached_dev_run(dc)) -+ goto err; -+ } - - return; - err: -diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c -index ddba4be1329a..78bca5aec0b2 100644 ---- a/drivers/md/bcache/sysfs.c -+++ b/drivers/md/bcache/sysfs.c -@@ -226,8 +226,11 @@ STORE(__cached_dev) - bch_cache_accounting_clear(&dc->accounting); - - if (attr == &sysfs_running && -- strtoul_or_return(buf)) -- bch_cached_dev_run(dc); -+ strtoul_or_return(buf)) { -+ ret = bch_cached_dev_run(dc); -+ if (ret) -+ return ret; -+ } - - if (attr == &sysfs_cache_mode) { - v = bch_read_string_list(buf, bch_cache_modes + 1); --- -2.13.6 - diff --git a/for-test/0001-bcache-fix-a-circular-dead-locking-with-dc-writeback.patch b/for-test/0001-bcache-fix-a-circular-dead-locking-with-dc-writeback.patch deleted file mode 100644 index 721a2fa..0000000 --- a/for-test/0001-bcache-fix-a-circular-dead-locking-with-dc-writeback.patch +++ /dev/null @@ -1,198 +0,0 @@ -From: Coly Li <colyli@suse.de> -Date: Sat, 18 Nov 2017 00:42:59 +0800 -Subject: [RFC] bcache: fix a circular dead locking with dc->writeback_lock and bch_register_lock - -When bcache is in writeback mode, and with heavy write I/O, a warning by -lockdep check reports a potential circular locking issue, - -[ 58.084940] ====================================================== -[ 58.084941] WARNING: possible circular locking dependency detected -[ 58.084942] 4.14.0-1-default+ #3 Tainted: G W -[ 58.084943] ------------------------------------------------------ -[ 58.084944] kworker/0:3/1140 is trying to acquire lock: -[ 58.084945] (&bch_register_lock){+.+.}, at: [<ffffffffa069a29b>] update_writeback_rate+0x8b/0x290 [bcache] -[ 58.084958] - but task is already holding lock: -[ 58.084958] (&dc->writeback_lock){++++}, at: [<ffffffffa069a22f>] update_writeback_rate+0x1f/0x290 [bcache] -[ 58.084966] - which lock already depends on the new lock. - -[ 58.084966] - the existing dependency chain (in reverse order) is: -[ 58.084967] - -> #1 (&dc->writeback_lock){++++}: -[ 58.084972] down_write+0x51/0xb0 -[ 58.084978] bch_cached_dev_attach+0x239/0x500 [bcache] -[ 58.084983] run_cache_set+0x683/0x880 [bcache] -[ 58.084987] register_bcache+0xec7/0x1450 [bcache] -[ 58.084990] kernfs_fop_write+0x10e/0x1a0 -[ 58.084994] __vfs_write+0x23/0x150 -[ 58.084995] vfs_write+0xc2/0x1c0 -[ 58.084996] SyS_write+0x45/0xa0 -[ 58.084997] entry_SYSCALL_64_fastpath+0x23/0x9a -[ 58.084998] - -> #0 (&bch_register_lock){+.+.}: -[ 58.085002] lock_acquire+0xd4/0x220 -[ 58.085003] __mutex_lock+0x70/0x950 -[ 58.085009] update_writeback_rate+0x8b/0x290 [bcache] -[ 58.085011] process_one_work+0x1e5/0x5e0 -[ 58.085012] worker_thread+0x4a/0x3f0 -[ 58.085014] kthread+0x141/0x180 -[ 58.085015] ret_from_fork+0x24/0x30 -[ 58.085015] - other info that might help us debug this: - -[ 58.085015] Possible unsafe locking scenario: - -[ 58.085016] CPU0 CPU1 -[ 58.085016] ---- ---- -[ 58.085016] lock(&dc->writeback_lock); -[ 58.085017] lock(&bch_register_lock); -[ 58.085018] lock(&dc->writeback_lock); -[ 58.085019] lock(&bch_register_lock); -[ 58.085019] - *** DEADLOCK *** - -This is a real circular locking issue, it may hold dc->writeback_lock -for long time, block btree related operations, introduce long latency -for front end I/O requests on cache device. - -The code path of bch_cached_dev_attach() firstly aquires bch_register_lock -then acquires dc->writeback_lock. And code path of kworker function -update_writeback_rate() firstly acquires dc->writeback_lock then acquires -bch_register_lock. - -In kworker function update_writeback_rate(), mutex dc->writeback_lock is -acquired before calling __update_writeback_rate(). After read the code -carefully it seems holding dc->writeback_lock in update_writeback_rate() -is unncessary. Let me explain why. - -In __update_writeback_rate(), when bcache_flash_devs_sectors_dirty() is -called, mutex bch_register_lock is acquired to prevent bcache devices -changes (add/remove) from the cache set, which is necessary. But rested -global objects do not need writeback_lock protection. - -Let's see each global objects referenced in __update_writeback_rate(), -- The following 3 objects are only read and always same value. They don't - need to be protected by dc->writeback_lock. - dc->disk.c - c->nbuckets - c->sb.bucket_size -- The following objects are only changed and referenced inside non re- - entrancy function __update_writeback_rate(), then don't need to be - protected by dc->writeback_lock. - dc->writeback_rate_p_term_inverse - dc->writeback_rate_integral - dc->writeback_rate_update_seconds - dc->writeback_rate_i_term_inverse - dc->writeback_rate_minimum - dc->writeback_rate_proportional - dc->writeback_rate_integral_scaled - dc->writeback_rate_change - dc->writeback_rate_target -- dc->writeback_percent - Only changed via sysfs interface in runtime, and it is a 8bit variable, - it is safe to access without dc->writeback_lock. -- c->cached_dev_sectors - This is a 64bit variable, updated in calc_cached_dev_sectors() and - only read in __update_writeback_rate(). Change it into atomic64_t will - be safe enough on both 32bit and 64bit hardware. -- bcache_dev_sectors_dirty() - Inside this function, d->nr_stripes is a consistent number in run time, - stripe_sectors_dirty on each stripe is atomic_t, they are updated in - bcache_dev_sectors_dirty_add() and only read in function - bcache_dev_sectors_dirty(). It is safe to access these varaibles without - dc->writeback_lock. And if the bcache is removing from cache set, its - cached device's writebackrate update kworker should be canceled firstly, - so we don't need to worry about a NULL pointer dereference if bcache - device is removed when bcache_dev_sectors_dirty() is executing. -- dc->writeback_rate.next - writeback_rate.next is only read in __update_writeback_rate() and - updated in bch_next_delay(). bch_next_delay() is referenced by - writeback_delay()<-read_dirty()<-bch_writeback_thread(), while mutex - dc->writeback_lock is not held. That is to say, current bcache code - does not protect writeback_rate.next for concurrent access at all. For - 32bit hardware it might be problematic. This patch doesn't fix existing - concurrent access issue on 32bit hardware, but not make things worse - neither. -- dc->writeback_rate.rate - writeback_rate.rate is only read in bch_next_delay() and updated in - __update_writeback_rate(). Again its concurrent access is not protected - by dc->writeback_lock, it is 32bit and only modified in one thread, so - it is safe to use for now. - -From the above analysis, kworker function update_writeback_rate() can work -properly without protection of dc->writeback_lock. The writeback rate -calculation might not be extremely accurate but good enough for writeback -I/O throttle. - -By removing mutex dc->writeback_lock, we can avoid a deadlock. And further -more, avoid lock contention between kworker update_writeback_rate() and -btree operations on dc->writeback_lock, which means a potential better I/O -latency for front end I/O requests. Because in writeback mode, front end -I/O request also needs to acquire dc->writeback_lock for btree operations. - -Signed-off-by: Coly Li <colyli@suse.de> ---- - drivers/md/bcache/bcache.h | 2 +- - drivers/md/bcache/super.c | 2 +- - drivers/md/bcache/writeback.c | 6 +----- - 3 files changed, 3 insertions(+), 7 deletions(-) - -diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h -index 843877e017e1..1b6964077100 100644 ---- a/drivers/md/bcache/bcache.h -+++ b/drivers/md/bcache/bcache.h -@@ -489,7 +489,7 @@ struct cache_set { - - struct bcache_device **devices; - struct list_head cached_devs; -- uint64_t cached_dev_sectors; -+ atomic64_t cached_dev_sectors; - struct closure caching; - - struct closure sb_write; -diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c -index b4d28928dec5..879e1a135180 100644 ---- a/drivers/md/bcache/super.c -+++ b/drivers/md/bcache/super.c -@@ -847,7 +847,7 @@ static void calc_cached_dev_sectors(struct cache_set *c) - list_for_each_entry(dc, &c->cached_devs, list) - sectors += bdev_sectors(dc->bdev); - -- c->cached_dev_sectors = sectors; -+ atomic64_set(&c->cached_dev_sectors, sectors); - } - - void bch_cached_dev_run(struct cached_dev *dc) -diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c -index 56a37884ca8b..cec10e6345af 100644 ---- a/drivers/md/bcache/writeback.c -+++ b/drivers/md/bcache/writeback.c -@@ -27,7 +27,7 @@ static void __update_writeback_rate(struct cached_dev *dc) - uint64_t cache_dirty_target = - div_u64(cache_sectors * dc->writeback_percent, 100); - int64_t target = div64_u64(cache_dirty_target * bdev_sectors(dc->bdev), -- c->cached_dev_sectors); -+ atomic64_read(&c->cached_dev_sectors)); - - /* - * PI controller: -@@ -92,14 +92,10 @@ static void update_writeback_rate(struct work_struct *work) - struct cached_dev, - writeback_rate_update); - -- down_read(&dc->writeback_lock); -- - if (atomic_read(&dc->has_dirty) && - dc->writeback_percent) - __update_writeback_rate(dc); - -- up_read(&dc->writeback_lock); -- - schedule_delayed_work(&dc->writeback_rate_update, - dc->writeback_rate_update_seconds * HZ); - } --- -2.13.6 - diff --git a/for-test/0001-bcache-fix-potential-deadlock-when-journal-space-is-.patch b/for-test/0001-bcache-fix-potential-deadlock-when-journal-space-is-.patch deleted file mode 100644 index 86b875e..0000000 --- a/for-test/0001-bcache-fix-potential-deadlock-when-journal-space-is-.patch +++ /dev/null @@ -1,283 +0,0 @@ -From d1676c794076bd4807a5091142ef13fffc1fae42 Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Wed, 26 Dec 2018 15:57:58 +0800 -Subject: [PATCH] bcache: fix potential deadlock when journal space is full - -When journal space is full, and all entries are referenced and no one -can be reclaimed, if the key of last journal entry makes bcache b+tree -splitted, there might be a busy loop or deadlock happens. - -When busy loop happens, people may observe kernel messages like, -(following kernel message copied from Stefan Priebe's report) -2018-08-06 02:08:06 BUG: workqueue lockup - pool cpus=1 node=0 -flags=0x1 nice=0 stuck for 51s! -2018-08-06 02:08:06 pending: memcg_kmem_cache_create_func -2018-08-06 02:08:06 delayed: memcg_kmem_cache_create_func -2018-08-06 02:08:06 workqueue bcache: flags=0x8 -2018-08-06 02:08:06 pwq 22: cpus=11 node=0 flags=0x0 nice=0 active=1/256 -2018-08-06 02:08:06 in-flight: 1764369:bch_data_insert_keys [bcache] -2018-08-06 02:08:06 pwq 18: cpus=9 node=0 flags=0x1 nice=0 -active=256/256 MAYDAY -2018-08-06 02:08:06 in-flight: 1765894:bch_data_insert_keys -[bcache], 1765908:bch_data_insert_keys [bcache], -1765931:bch_data_insert_keys [bcache], 1765984:bch_data_insert_keys -[bcache], 1765815:bch_data_insert_keys [bcache], - -And when deadlock happens, a kernel panic might be triggered by soft -lockup. - -Here let me explain how such problem happens. -- Journal space is full filled. - Normally it won't happen, but if there are too many small writes - (e.g. 4KB or smaller write size) it is possible to full fill all the - journal buckets. -- Last entry in journal cause btree split - In bch_data_insert_keys(), the inserting key is added into journal by - bch_journal(), then inserted to btree by bch_btree_insert(). If key - insert is done, drops the journal entry reference counter. If the - journal reference counter reaches 0, it means this entry has no need - and can be freed. - If the last journal entry added into journal space, then following - bch_btree_insert() inserts the key into b+tree and causes the tree - to split, bch_journal_meta() will be called to journal new btree root - and uuid bucket. It means more journal space is required but there is - no journal space to allocate for meta data keys. -- rw_lock dependence - When the key of last journal entry cuases btree to split, during the - btree split, rw_lock will be hold for some btree level lock. And this - level lock will continue to be hold untill there is available journal - space for jouranling metadata, that is to say, wait for all journal - entries in last journal bucket to be freed. If one of the key - journaled in last journal bucket depneds on the same level lock which - is hold by btree split code, a potential deadlock will happen. - -Note: only leaf node update will go into journal, btree internal node -update won't be journaled. During btree split, only btree root location -and uuid bucket index are journaled, the btree root node and uuid -bucket are not journal. - -To fix such no-journal-space-after-btree-split issue, there are two -different methods (the complexed one and the simple one). -- The complexed one - The idea is similar to jbd2. Add session concept to bch journal, - calculate conservative journal space usage for each key to insert, - and reserved the conservative journal space in each session. Then if - btree split happens, the required extra jouranl space is reserved - already and no dealock will happen. -- The simple one - Reserve 256 bytes journal space (size of two full jsets), the space is - only used for 1) bch_journal_meta() from bch_btree_set_root() during - btree split and 2) all other journal space are full filled. Normal - journaling will never occupy such reserved space, so there is always - avaiable journal space for journaling keys of uuid bucket and btree - root. - -Add session for bcache journal needs a lot of effort, bcache journal is -quite simple, it doesn't reserve for such complicated code. The simple -solution works fine for current bcache journal code, therefore I decide -to fix the no-journal-space-after-btree-split issue by reserving 256 -bytes journal space. - -Is 256 bytes reserved space is enough and safe ? Yes, let me explain -why here. -- For normal keys journaling, if there is no free journal space, the - requestor will sleep on closure_sync() and be waken up when avialable - space is reclaimed by journal_reclaim(). They won't use the reserved - space. -- The reserved area is for no-journal-space-after-btree-split issue, for - other condition it is OK to full fill the reserved area too. Because - the requestor will sleep without holding b+tree level lock, and will - be waken up when there is available journal space. -- Indeed currently only reserve 2 keys space is sufficient, but maybe - in future we may change the split code to journal more meta keys, so - reserve 256 bytes (4 keys) should be enough in reasonable future. - -The reason to reserve 256 bytes is, the reserved area should be smaller -than bucket size, so I choose half size of the smallest bucket size as -the reserved area size, which is 256 bytes. - -Signed-off-by: Coly Li <colyli@suse.de> -Reported-by: Stefan Priebe <s.priebe@profihost.ag> ---- - drivers/md/bcache/journal.c | 109 ++++++++++++++++++++++++++++++++++++++++---- - drivers/md/bcache/journal.h | 3 ++ - 2 files changed, 104 insertions(+), 8 deletions(-) - -diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c -index 522c7426f3a0..3cf5f5abb20b 100644 ---- a/drivers/md/bcache/journal.c -+++ b/drivers/md/bcache/journal.c -@@ -489,12 +489,28 @@ static void do_journal_discard(struct cache *ca) - } - } - -+static inline bool last_available_journal_bucket(struct cache_set *c) -+{ -+ struct cache *ca; -+ unsigned int iter; -+ struct journal_device *ja; -+ -+ for_each_cache(ca, c, iter) { -+ ja = &ca->journal; -+ if ((ja->cur_idx + 1) % ca->sb.njournal_buckets == -+ ja->last_idx) -+ return true; -+ } -+ -+ return false; -+} -+ - static void journal_reclaim(struct cache_set *c) - { - struct bkey *k = &c->journal.key; - struct cache *ca; - uint64_t last_seq; -- unsigned int iter, n = 0; -+ unsigned int iter, n = 0, do_wakeup = 0; - atomic_t p __maybe_unused; - - atomic_long_inc(&c->reclaim); -@@ -518,8 +534,18 @@ static void journal_reclaim(struct cache_set *c) - for_each_cache(ca, c, iter) - do_journal_discard(ca); - -- if (c->journal.blocks_free) -+ if (!last_available_journal_bucket(c) && -+ c->journal.blocks_free) { -+ do_wakeup = 1; - goto out; -+ } -+ -+ if (last_available_journal_bucket(c) && -+ ((c->journal.blocks_free * c->sb.block_size) > -+ BCH_JOURNAL_RESERVE)) { -+ do_wakeup = 1; -+ goto out; -+ } - - /* - * Allocate: -@@ -543,10 +569,12 @@ static void journal_reclaim(struct cache_set *c) - bkey_init(k); - SET_KEY_PTRS(k, n); - -- if (n) -+ if (n) { - c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; -+ do_wakeup = 1; -+ } - out: -- if (!journal_full(&c->journal)) -+ if (do_wakeup && !journal_full(&c->journal)) - __closure_wake_up(&c->journal.wait); - } - -@@ -707,6 +735,73 @@ static void journal_try_write(struct cache_set *c) - } - } - -+static bool no_journal_wait(struct cache_set *c, -+ size_t sectors, -+ int nkeys) -+{ -+ bool is_journal_meta = (nkeys == 0) ? true : false; -+ -+ if (c->journal.blocks_free) { -+ if (!last_available_journal_bucket(c) && -+ sectors <= min_t(size_t, -+ c->journal.blocks_free * c->sb.block_size, -+ PAGE_SECTORS << JSET_BITS)) { -+ return true; -+ } -+ -+ if (last_available_journal_bucket(c)) { -+ if (!is_journal_meta && -+ sectors <= min_t(size_t, -+ (c->journal.blocks_free * -+ c->sb.block_size) - -+ BCH_JOURNAL_RESERVE, -+ PAGE_SECTORS << JSET_BITS)) { -+ return true; -+ } -+ if (is_journal_meta && -+ sectors <= min_t(size_t, -+ c->journal.blocks_free * -+ c->sb.block_size, -+ PAGE_SECTORS << JSET_BITS)) { -+ return true; -+ } -+ } -+ } -+ -+ return false; -+} -+ -+static inline bool should_try_write(struct cache_set *c, -+ struct journal_write *w, -+ int nkeys) -+{ -+ bool is_journal_meta = (nkeys == 0) ? true : false; -+ size_t sectors = __set_blocks(w->data, w->data->keys, -+ block_bytes(c)) * c->sb.block_size; -+ -+ -+ if (!last_available_journal_bucket(c) && -+ !journal_full(&c->journal)) -+ return true; -+ -+ if (last_available_journal_bucket(c)) { -+ if (!is_journal_meta && -+ sectors <= min_t(size_t, -+ (c->journal.blocks_free * -+ c->sb.block_size) - -+ BCH_JOURNAL_RESERVE, -+ PAGE_SECTORS << JSET_BITS)) -+ return true; -+ -+ if (is_journal_meta && !journal_full(&c->journal)) -+ return true; -+ } -+ -+ return false; -+} -+ -+ -+ - static struct journal_write *journal_wait_for_write(struct cache_set *c, - unsigned int nkeys) - __acquires(&c->journal.lock) -@@ -725,15 +820,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, - sectors = __set_blocks(w->data, w->data->keys + nkeys, - block_bytes(c)) * c->sb.block_size; - -- if (sectors <= min_t(size_t, -- c->journal.blocks_free * c->sb.block_size, -- PAGE_SECTORS << JSET_BITS)) -+ if (no_journal_wait(c, sectors, nkeys)) - return w; - - if (wait) - closure_wait(&c->journal.wait, &cl); - -- if (!journal_full(&c->journal)) { -+ if (should_try_write(c, w, nkeys)) { - if (wait) - trace_bcache_journal_entry_full(c); - -diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h -index 66f0facff84b..346d00543ee7 100644 ---- a/drivers/md/bcache/journal.h -+++ b/drivers/md/bcache/journal.h -@@ -159,6 +159,9 @@ struct journal_device { - - #define JOURNAL_PIN 20000 - -+/* reesrve 256 bytes in journal area for meta data jouranling */ -+#define BCH_JOURNAL_RESERVE 256 -+ - #define journal_full(j) \ - (!(j)->blocks_free || fifo_free(&(j)->pin) <= 1) - --- -2.16.4 - diff --git a/for-test/0001-bcache-fix-potential-journal-dead-lock.patch b/for-test/0001-bcache-fix-potential-journal-dead-lock.patch deleted file mode 100644 index 7021968..0000000 --- a/for-test/0001-bcache-fix-potential-journal-dead-lock.patch +++ /dev/null @@ -1,257 +0,0 @@ -From 3e1a79802e63605c4d9849093ce769a41b12c8b9 Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Fri, 30 Nov 2018 19:14:17 +0800 -Subject: [PATCH] bcache: fix potential journal dead lock - -Signed-off-by: Coly Li <colyli@suse.de> ---- - drivers/md/bcache/btree.c | 2 +- - drivers/md/bcache/journal.c | 110 ++++++++++++++++++++++++++++++++++++++++---- - drivers/md/bcache/journal.h | 12 ++++- - drivers/md/bcache/request.c | 6 ++- - 4 files changed, 115 insertions(+), 15 deletions(-) - -diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c -index 3f4211b5cd33..2e671f39acf6 100644 ---- a/drivers/md/bcache/btree.c -+++ b/drivers/md/bcache/btree.c -@@ -2327,7 +2327,7 @@ void bch_btree_set_root(struct btree *b) - - b->c->root = b; - -- bch_journal_meta(b->c, &cl); -+ __bch_journal_meta(b->c, &cl, BCH_JOURNAL_USE_RESV); - closure_sync(&cl); - } - -diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c -index 522c7426f3a0..932fe2e45e8c 100644 ---- a/drivers/md/bcache/journal.c -+++ b/drivers/md/bcache/journal.c -@@ -489,6 +489,22 @@ static void do_journal_discard(struct cache *ca) - } - } - -+static inline bool last_available_journal_bucket(struct cache_set *c) -+{ -+ struct cache *ca; -+ unsigned int iter; -+ struct journal_device *ja; -+ -+ for_each_cache(ca, c, iter) { -+ ja = &ca->journal; -+ if ((ja->cur_idx + 1) % ca->sb.njournal_buckets == -+ ja->last_idx) -+ return true; -+ } -+ -+ return false; -+} -+ - static void journal_reclaim(struct cache_set *c) - { - struct bkey *k = &c->journal.key; -@@ -518,7 +534,15 @@ static void journal_reclaim(struct cache_set *c) - for_each_cache(ca, c, iter) - do_journal_discard(ca); - -- if (c->journal.blocks_free) -+ /* -+ * If this is the last available journal bucket, we -+ * need to take care of the reseved journal space to -+ * avoid no-space-after-btree-split journal dead lock. -+ */ -+ if ((!last_available_journal_bucket(c) && -+ c->journal.blocks_free > 0) || -+ (last_available_journal_bucket(c) && -+ c->journal.blocks_free > BCH_JOURNAL_BLKS_RESV)) - goto out; - - /* -@@ -707,8 +731,67 @@ static void journal_try_write(struct cache_set *c) - } - } - -+static inline bool no_journal_wait(struct cache_set *c, -+ size_t sectors, -+ int journal_flags) -+{ -+ if (!last_available_journal_bucket(c) && -+ (sectors < min_t(size_t, -+ c->journal.blocks_free * c->sb.block_size, -+ PAGE_SECTORS << JSET_BITS))) -+ return true; -+ -+ /* -+ * We are at the last available journal bucket, now take -+ * care of the access to reserved journal space, to avoid -+ * the no-space-after-btree-split journal dead lock. -+ */ -+ if (last_available_journal_bucket(c)) { -+ /* no access to reserved journal space */ -+ if (!(journal_flags & BCH_JOURNAL_USE_RESV) && -+ sectors < min_t(size_t, -+ c->journal.blocks_free * c->sb.block_size -+ - BCH_JOURNAL_BLKS_RESV, -+ PAGE_SECTORS << JSET_BITS)) -+ return true; -+ -+ /* has access to reserved journal space */ -+ if ((journal_flags & BCH_JOURNAL_USE_RESV) && -+ sectors <= min_t(size_t, -+ c->journal.blocks_free * c->sb.block_size, -+ PAGE_SECTORS << JSET_BITS)) -+ return true; -+ } -+ -+ return false; -+} -+ -+static inline bool try_write(struct cache_set *c, -+ int journal_flags) -+{ -+ if (!last_available_journal_bucket(c) && -+ fifo_free(&(&c->journal)->pin) > (JOURNAL_PIN_RSV + 1)) -+ return true; -+ -+ if (last_available_journal_bucket(c)) { -+ /* no access to reserved journal space and pin */ -+ if (!(journal_flags & BCH_JOURNAL_USE_RESV) && -+ fifo_free(&(&c->journal)->pin) > (JOURNAL_PIN_RSV + 1) && -+ c->journal.blocks_free > BCH_JOURNAL_BLKS_RESV) -+ return true; -+ -+ /* has access to reserved journal space and pin*/ -+ if ((journal_flags & BCH_JOURNAL_USE_RESV) && -+ !journal_full(&c->journal)) -+ return true; -+ } -+ -+ return false; -+} -+ - static struct journal_write *journal_wait_for_write(struct cache_set *c, -- unsigned int nkeys) -+ unsigned int nkeys, -+ int journal_flags) - __acquires(&c->journal.lock) - { - size_t sectors; -@@ -725,15 +808,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, - sectors = __set_blocks(w->data, w->data->keys + nkeys, - block_bytes(c)) * c->sb.block_size; - -- if (sectors <= min_t(size_t, -- c->journal.blocks_free * c->sb.block_size, -- PAGE_SECTORS << JSET_BITS)) -+ if (no_journal_wait(c, sectors, journal_flags)) - return w; - - if (wait) - closure_wait(&c->journal.wait, &cl); - -- if (!journal_full(&c->journal)) { -+ if (try_write(c, journal_flags)){ - if (wait) - trace_bcache_journal_entry_full(c); - -@@ -782,7 +863,8 @@ static void journal_write_work(struct work_struct *work) - - atomic_t *bch_journal(struct cache_set *c, - struct keylist *keys, -- struct closure *parent) -+ struct closure *parent, -+ int journal_flags) - { - struct journal_write *w; - atomic_t *ret; -@@ -790,7 +872,7 @@ atomic_t *bch_journal(struct cache_set *c, - if (!CACHE_SYNC(&c->sb)) - return NULL; - -- w = journal_wait_for_write(c, bch_keylist_nkeys(keys)); -+ w = journal_wait_for_write(c, bch_keylist_nkeys(keys), journal_flags); - - memcpy(bset_bkey_last(w->data), keys->keys, bch_keylist_bytes(keys)); - w->data->keys += bch_keylist_nkeys(keys); -@@ -814,18 +896,26 @@ atomic_t *bch_journal(struct cache_set *c, - return ret; - } - --void bch_journal_meta(struct cache_set *c, struct closure *cl) -+void __bch_journal_meta(struct cache_set *c, -+ struct closure *cl, -+ int journal_flags) - { - struct keylist keys; - atomic_t *ref; - - bch_keylist_init(&keys); - -- ref = bch_journal(c, &keys, cl); -+ ref = bch_journal(c, &keys, cl, journal_flags); - if (ref) - atomic_dec_bug(ref); - } - -+void bch_journal_meta(struct cache_set *c, -+ struct closure *cl) -+{ -+ __bch_journal_meta(c, cl, 0); -+} -+ - void bch_journal_free(struct cache_set *c) - { - free_pages((unsigned long) c->journal.w[1].data, JSET_BITS); -diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h -index 66f0facff84b..4488ea37d666 100644 ---- a/drivers/md/bcache/journal.h -+++ b/drivers/md/bcache/journal.h -@@ -157,7 +157,11 @@ struct journal_device { - #define journal_pin_cmp(c, l, r) \ - (fifo_idx(&(c)->journal.pin, (l)) > fifo_idx(&(c)->journal.pin, (r))) - --#define JOURNAL_PIN 20000 -+#define BCH_JOURNAL_USE_RESV 1 -+ -+#define JOURNAL_PIN 20000 -+#define JOURNAL_PIN_RSV 1 -+#define BCH_JOURNAL_BLKS_RESV (PAGE_SECTORS << JSET_BITS) - - #define journal_full(j) \ - (!(j)->blocks_free || fifo_free(&(j)->pin) <= 1) -@@ -169,9 +173,13 @@ struct keylist; - - atomic_t *bch_journal(struct cache_set *c, - struct keylist *keys, -- struct closure *parent); -+ struct closure *parent, -+ int journal_flags); - void bch_journal_next(struct journal *j); - void bch_journal_mark(struct cache_set *c, struct list_head *list); -+void __bch_journal_meta(struct cache_set *c, -+ struct closure *cl, -+ int journal_flags); - void bch_journal_meta(struct cache_set *c, struct closure *cl); - int bch_journal_read(struct cache_set *c, struct list_head *list); - int bch_journal_replay(struct cache_set *c, struct list_head *list); -diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c -index 3bf35914bb57..20e31ecb73c0 100644 ---- a/drivers/md/bcache/request.c -+++ b/drivers/md/bcache/request.c -@@ -75,8 +75,10 @@ static void bch_data_insert_keys(struct closure *cl) - #endif - - if (!op->replace) -- journal_ref = bch_journal(op->c, &op->insert_keys, -- op->flush_journal ? cl : NULL); -+ journal_ref = bch_journal(op->c, -+ &op->insert_keys, -+ op->flush_journal ? cl : NULL, -+ 0); - - ret = bch_btree_insert(op->c, &op->insert_keys, - journal_ref, replace_key); --- -2.16.4 - diff --git a/for-test/0001-bcache-fix-wrong-usage-use-after-freed-on-keylist-in.patch b/for-test/0001-bcache-fix-wrong-usage-use-after-freed-on-keylist-in.patch deleted file mode 100644 index d5a2a96..0000000 --- a/for-test/0001-bcache-fix-wrong-usage-use-after-freed-on-keylist-in.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 7f0d1afc8b7336b9576f5f4c60d338dcdeabe5e1 Mon Sep 17 00:00:00 2001 -From: Shenghui Wang <shhuiw@foxmail.com> -Date: Fri, 8 Feb 2019 19:02:42 +0800 -Subject: [PATCH] bcache: fix wrong usage use-after-freed on keylist in - out_nocoalesce branch of btree_gc_coalesce - -Elements of keylist should be accessed before the list is freed. -Move bch_keylist_free() calling after the while loop to avoid wrong -content accessed. - -Signed-off-by: Shenghui Wang <shhuiw@foxmail.com> ---- - drivers/md/bcache/btree.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c -index 23cb1dc7296b..13671f381c44 100644 ---- a/drivers/md/bcache/btree.c -+++ b/drivers/md/bcache/btree.c -@@ -1475,11 +1475,11 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, - - out_nocoalesce: - closure_sync(&cl); -- bch_keylist_free(&keylist); - - while ((k = bch_keylist_pop(&keylist))) - if (!bkey_cmp(k, &ZERO_KEY)) - atomic_dec(&b->c->prio_blocked); -+ bch_keylist_free(&keylist); - - for (i = 0; i < nodes; i++) - if (!IS_ERR_OR_NULL(new_nodes[i])) { --- -2.16.4 - diff --git a/for-test/0001-bcache-more-readable-in-bch_journal.patch b/for-test/0001-bcache-more-readable-in-bch_journal.patch deleted file mode 100644 index 1e13afa..0000000 --- a/for-test/0001-bcache-more-readable-in-bch_journal.patch +++ /dev/null @@ -1,65 +0,0 @@ -From b488ea2ffce499d0c683a7f09681d9f8a0fa5d8e Mon Sep 17 00:00:00 2001 -From: Coly Li <colyli@suse.de> -Date: Tue, 7 Nov 2017 00:47:51 +0800 -Subject: [PATCH] bcache: more readable in bch_journal() - ---- - drivers/md/bcache/journal.c | 18 +++++++++++------- - 1 file changed, 11 insertions(+), 7 deletions(-) - -diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c -index 02a98ddb592d..29e1c519b934 100644 ---- a/drivers/md/bcache/journal.c -+++ b/drivers/md/bcache/journal.c -@@ -41,7 +41,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list, - struct jset *j, *data = ca->set->journal.w[0].data; - struct closure cl; - unsigned len, left, offset = 0; -- int ret = 0; -+ int ret = -EIO; - sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]); - - closure_init_stack(&cl); -@@ -65,12 +65,15 @@ reread: left = ca->sb.bucket_size - offset; - closure_bio_submit(bio, &cl); - closure_sync(&cl); - -+ if (bio->bi_status) -+ return ret; -+ - /* This function could be simpler now since we no longer write - * journal entries that overlap bucket boundaries; this means - * the start of a bucket will always have a valid journal entry - * if it has any journal entries at all. - */ -- -+ ret = 0; - j = data; - while (len) { - struct list_head *where; -@@ -769,16 +772,17 @@ atomic_t *bch_journal(struct cache_set *c, - - if (parent) { - closure_wait(&w->wait, parent); -+ /* unlock */ - journal_try_write(c); -- } else if (!w->dirty) { -+ return ret; -+ } -+ -+ if (!w->dirty) { - w->dirty = true; - schedule_delayed_work(&c->journal.work, - msecs_to_jiffies(c->journal_delay_ms)); -- spin_unlock(&c->journal.lock); -- } else { -- spin_unlock(&c->journal.lock); - } -- -+ spin_unlock(&c->journal.lock); - - return ret; - } --- -2.13.6 - |