diff options
author | Coly Li <colyli@suse.de> | 2019-03-02 18:36:56 +0800 |
---|---|---|
committer | Coly Li <colyli@suse.de> | 2019-03-02 18:36:56 +0800 |
commit | 66f637bea17f25b4c4e6ce9f188c28c8b0ad2e7c (patch) | |
tree | 6fac1d73caad01590ef542431a524288ef7fe962 | |
parent | e7a223030c43cab8000b66e35b841ce5a3ba9b8a (diff) | |
download | bcache-patches-66f637bea17f25b4c4e6ce9f188c28c8b0ad2e7c.tar.gz |
for-test: update journal deadlock fixes
-rw-r--r-- | for-test/jouranl-deadlock/0001-bcache-acquire-c-journal.lock-in-bch_btree_leaf_dirt.patch | 51 | ||||
-rw-r--r-- | for-test/jouranl-deadlock/0002-bcache-move-definition-of-int-ret-out-of-macro-read_.patch (renamed from for-test/jouranl-deadlock/0001-bcache-move-definition-of-int-ret-out-of-macro-read_.patch) | 4 | ||||
-rw-r--r-- | for-test/jouranl-deadlock/0003-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-jou.patch (renamed from for-test/jouranl-deadlock/0002-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-jou.patch) | 4 | ||||
-rw-r--r-- | for-test/jouranl-deadlock/0004-bcache-reload-jouranl-key-information-during-journal.patch (renamed from for-test/jouranl-deadlock/0003-bcache-reload-jouranl-key-information-during-journal.patch) | 18 | ||||
-rw-r--r-- | for-test/jouranl-deadlock/0005-bcache-fix-journal-deadlock-during-jouranl-replay.patch (renamed from for-test/jouranl-deadlock/0004-bcache-fix-journal-deadlock-during-jouranl-replay.patch) | 30 | ||||
-rw-r--r-- | for-test/jouranl-deadlock/0006-bcache-reserve-space-for-journal_meta-in-run-time.patch (renamed from for-test/jouranl-deadlock/0005-bcache-reserve-space-for-journal_meta-in-run-time.patch) | 41 |
6 files changed, 92 insertions, 56 deletions
diff --git a/for-test/jouranl-deadlock/0001-bcache-acquire-c-journal.lock-in-bch_btree_leaf_dirt.patch b/for-test/jouranl-deadlock/0001-bcache-acquire-c-journal.lock-in-bch_btree_leaf_dirt.patch new file mode 100644 index 0000000..358c3c0 --- /dev/null +++ b/for-test/jouranl-deadlock/0001-bcache-acquire-c-journal.lock-in-bch_btree_leaf_dirt.patch @@ -0,0 +1,51 @@ +From 3c7e66546d18ead01bd821fa07f3ca2c73a9d964 Mon Sep 17 00:00:00 2001 +From: Coly Li <colyli@suse.de> +Date: Sat, 2 Mar 2019 18:19:08 +0800 +Subject: [PATCH 1/6] bcache: acquire c->journal.lock in bch_btree_leaf_dirty() + +In bch_btree_leaf_dirty() when increase bcache journal pin counter, +current code uses atomic_inc(w->journal) directly. This is problematic +indeed, which may cause following code in journal.c:journal_reclaim() +not work properly, + 610 while (!atomic_read(&fifo_front(&c->journal.pin))) + 611 fifo_pop(&c->journal.pin, p); + +The above code piece is protected by spinlock c->journal.lock, and +the atomic counter w->journal in btree.c:bch_btree_leaf_dirty() is one +of the nodes from c->journal.pin. If the above while() loop just happens +to reach a fifo node which is w->journal in bch_btree_leaf_dirty(), +it is possible that the between line 610 and 611 the counter w->journal +is increased but poped off in journal_reclaim(). Then the journal jset +which w->journal referenced in bch_btree_leaf_dirty() gets lost. + +If system crashes or reboots before bkeys of the lost jset flushing back +to bcache btree node, journal_replay() after the reboot may complains +some journal entries lost and fail to register cache set. + +Such race condition is very rare to happen, I observe such issue when +I modify the journal buckets number to 3, which makes only a limited +number of jset being available. Then it is possible to observe journal +replay failure due to lost journal jset(s). + +Signed-off-by: Coly Li <colyli@suse.de> +--- + drivers/md/bcache/btree.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index 23cb1dc7296b..ac1b9159402e 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -551,7 +551,9 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) + + if (!w->journal) { + w->journal = journal_ref; ++ spin_lock(&b->c->journal.lock); + atomic_inc(w->journal); ++ spin_unlock(&b->c->journal.lock); + } + } + +-- +2.16.4 + diff --git a/for-test/jouranl-deadlock/0001-bcache-move-definition-of-int-ret-out-of-macro-read_.patch b/for-test/jouranl-deadlock/0002-bcache-move-definition-of-int-ret-out-of-macro-read_.patch index 8abdfa8..1212a23 100644 --- a/for-test/jouranl-deadlock/0001-bcache-move-definition-of-int-ret-out-of-macro-read_.patch +++ b/for-test/jouranl-deadlock/0002-bcache-move-definition-of-int-ret-out-of-macro-read_.patch @@ -1,7 +1,7 @@ -From 9267af6ab8e85a8c5fd836fa24e01d5962aaa01e Mon Sep 17 00:00:00 2001 +From da41d81e0abd211d2990d848cd33744ff335cd43 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Wed, 27 Feb 2019 18:10:48 +0800 -Subject: [PATCH 1/5] bcache: move definition of 'int ret' out of macro +Subject: [PATCH 2/6] bcache: move definition of 'int ret' out of macro read_bucket() 'int ret' is defined as a local variable inside macro read_bucket(). diff --git a/for-test/jouranl-deadlock/0002-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-jou.patch b/for-test/jouranl-deadlock/0003-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-jou.patch index f246cd9..3353e92 100644 --- a/for-test/jouranl-deadlock/0002-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-jou.patch +++ b/for-test/jouranl-deadlock/0003-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-jou.patch @@ -1,7 +1,7 @@ -From c0b8b24f794ec53aa501c28278c2f10711f6cccf Mon Sep 17 00:00:00 2001 +From d8c81f7cdb63bc3a2b00a8a9e5e9b4783e42c702 Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Wed, 27 Feb 2019 20:22:23 +0800 -Subject: [PATCH 2/5] bcache: never set 0 to KEY_PTRS of jouranl key in +Subject: [PATCH 3/6] bcache: never set 0 to KEY_PTRS of jouranl key in journal_reclaim() In journal_reclaim() ja->cur_idx of each cache will be update to diff --git a/for-test/jouranl-deadlock/0003-bcache-reload-jouranl-key-information-during-journal.patch b/for-test/jouranl-deadlock/0004-bcache-reload-jouranl-key-information-during-journal.patch index 8ef3bfe..45f0823 100644 --- a/for-test/jouranl-deadlock/0003-bcache-reload-jouranl-key-information-during-journal.patch +++ b/for-test/jouranl-deadlock/0004-bcache-reload-jouranl-key-information-during-journal.patch @@ -1,7 +1,7 @@ -From 2414835ec02c84f592361eda29a6c45112b9cd5d Mon Sep 17 00:00:00 2001 +From 1ff320546f894a6067c6a73bfaa937fca20308de Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Wed, 27 Feb 2019 20:32:22 +0800 -Subject: [PATCH 3/5] bcache: reload jouranl key information during journal +Subject: [PATCH 4/6] bcache: reload jouranl key information during journal replay When bcache journal initiates during running cache set, cache set @@ -47,14 +47,14 @@ in bch_journal_read() before replying journal by bch_journal_replay(). Signed-off-by: Coly Li <colyli@suse.de> --- - drivers/md/bcache/journal.c | 89 +++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 89 insertions(+) + drivers/md/bcache/journal.c | 87 +++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 87 insertions(+) diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c -index 5180bed911ef..9c43e0d57f9b 100644 +index 5180bed911ef..a6deb16c15c8 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c -@@ -143,6 +143,91 @@ reread: left = ca->sb.bucket_size - offset; +@@ -143,6 +143,89 @@ reread: left = ca->sb.bucket_size - offset; return ret; } @@ -122,9 +122,8 @@ index 5180bed911ef..9c43e0d57f9b 100644 + + offset += blocks * ca->sb.block_size; + len -= blocks * ca->sb.block_size; -+ j = ((void *) j) + blocks * ca->sb.block_size; ++ j = ((void *) j) + blocks * block_bytes(ca); + } -+ + } +out: + c->journal.blocks_free = @@ -132,7 +131,6 @@ index 5180bed911ef..9c43e0d57f9b 100644 + used_blocks; + + k->ptr[n++] = MAKE_PTR(0, bucket, ca->sb.nr_this_dev); -+ + } + + BUG_ON(n == 0); @@ -146,7 +144,7 @@ index 5180bed911ef..9c43e0d57f9b 100644 int bch_journal_read(struct cache_set *c, struct list_head *list) { #define read_bucket(b) \ -@@ -268,6 +353,10 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) +@@ -268,6 +351,10 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) struct journal_replay, list)->j.seq; diff --git a/for-test/jouranl-deadlock/0004-bcache-fix-journal-deadlock-during-jouranl-replay.patch b/for-test/jouranl-deadlock/0005-bcache-fix-journal-deadlock-during-jouranl-replay.patch index a9e1ff1..82d1cea 100644 --- a/for-test/jouranl-deadlock/0004-bcache-fix-journal-deadlock-during-jouranl-replay.patch +++ b/for-test/jouranl-deadlock/0005-bcache-fix-journal-deadlock-during-jouranl-replay.patch @@ -1,7 +1,7 @@ -From a29ac97f8fa25a195d594a6c1c4333cc924d6b59 Mon Sep 17 00:00:00 2001 +From ee8cbff3518dcaf67c16cff0cefe2a4424573bff Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Wed, 27 Feb 2019 20:35:02 +0800 -Subject: [PATCH 4/5] bcache: fix journal deadlock during jouranl replay +Subject: [PATCH 5/6] bcache: fix journal deadlock during jouranl replay A deadlock of bcache jouranling may happen during journal replay. Such deadlock happens when, @@ -60,10 +60,10 @@ Signed-off-by: Coly Li <colyli@suse.de> 2 files changed, 97 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c -index 9c43e0d57f9b..d48fd57397ff 100644 +index a6deb16c15c8..c60a702f53a9 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c -@@ -417,6 +417,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) +@@ -415,6 +415,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) uint64_t start = i->j.last_seq, end = i->j.seq, n = start; struct keylist keylist; @@ -72,7 +72,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644 list_for_each_entry(i, list, list) { BUG_ON(i->pin && atomic_read(i->pin) != 1); -@@ -450,6 +452,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) +@@ -448,6 +450,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) pr_info("journal replay done, %i keys in %i entries, seq %llu", keys, entries, end); err: @@ -80,7 +80,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644 while (!list_empty(list)) { i = list_first_entry(list, struct journal_replay, list); list_del(&i->list); -@@ -579,6 +582,22 @@ static void do_journal_discard(struct cache *ca) +@@ -577,6 +580,22 @@ static void do_journal_discard(struct cache *ca) } } @@ -103,7 +103,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644 static void journal_reclaim(struct cache_set *c) { struct bkey *k = &c->journal.key; -@@ -586,6 +605,7 @@ static void journal_reclaim(struct cache_set *c) +@@ -584,6 +603,7 @@ static void journal_reclaim(struct cache_set *c) uint64_t last_seq; unsigned int iter, n = 0; atomic_t p __maybe_unused; @@ -111,7 +111,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644 atomic_long_inc(&c->reclaim); -@@ -608,8 +628,13 @@ static void journal_reclaim(struct cache_set *c) +@@ -606,8 +626,13 @@ static void journal_reclaim(struct cache_set *c) for_each_cache(ca, c, iter) do_journal_discard(ca); @@ -126,7 +126,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644 /* * Allocate: -@@ -634,9 +659,10 @@ static void journal_reclaim(struct cache_set *c) +@@ -632,9 +657,10 @@ static void journal_reclaim(struct cache_set *c) bkey_init(k); SET_KEY_PTRS(k, n); c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; @@ -138,7 +138,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644 __closure_wake_up(&c->journal.wait); } -@@ -694,6 +720,21 @@ static void journal_write_unlock(struct closure *cl) +@@ -692,6 +718,21 @@ static void journal_write_unlock(struct closure *cl) spin_unlock(&c->journal.lock); } @@ -160,7 +160,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644 static void journal_write_unlocked(struct closure *cl) __releases(c->journal.lock) { -@@ -712,7 +753,7 @@ static void journal_write_unlocked(struct closure *cl) +@@ -710,7 +751,7 @@ static void journal_write_unlocked(struct closure *cl) if (!w->need_write) { closure_return_with_destructor(cl, journal_write_unlock); return; @@ -169,14 +169,14 @@ index 9c43e0d57f9b..d48fd57397ff 100644 journal_reclaim(c); spin_unlock(&c->journal.lock); -@@ -800,6 +841,52 @@ static void journal_try_write(struct cache_set *c) +@@ -798,6 +839,52 @@ static void journal_try_write(struct cache_set *c) } } +static bool no_journal_wait(struct cache_set *c, + size_t sectors) +{ -+ int last = last_available_journal_bucket(c); ++ bool last = last_available_journal_bucket(c); + size_t reserved_sectors = 0; + size_t n = min_t(size_t, + c->journal.blocks_free * c->sb.block_size, @@ -222,7 +222,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644 static struct journal_write *journal_wait_for_write(struct cache_set *c, unsigned int nkeys) __acquires(&c->journal.lock) -@@ -818,15 +905,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, +@@ -816,15 +903,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, sectors = __set_blocks(w->data, w->data->keys + nkeys, block_bytes(c)) * c->sb.block_size; @@ -240,7 +240,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644 if (wait) trace_bcache_journal_entry_full(c); -@@ -935,6 +1020,7 @@ int bch_journal_alloc(struct cache_set *c) +@@ -933,6 +1018,7 @@ int bch_journal_alloc(struct cache_set *c) INIT_DELAYED_WORK(&j->work, journal_write_work); c->journal_delay_ms = 100; diff --git a/for-test/jouranl-deadlock/0005-bcache-reserve-space-for-journal_meta-in-run-time.patch b/for-test/jouranl-deadlock/0006-bcache-reserve-space-for-journal_meta-in-run-time.patch index d1cd1bd..c955391 100644 --- a/for-test/jouranl-deadlock/0005-bcache-reserve-space-for-journal_meta-in-run-time.patch +++ b/for-test/jouranl-deadlock/0006-bcache-reserve-space-for-journal_meta-in-run-time.patch @@ -1,7 +1,7 @@ -From 4a1a6857a7cdd0018cbb5062eae625fb85e2ad6d Mon Sep 17 00:00:00 2001 +From 60b326d839c8df0528c9567db590173a8d11060b Mon Sep 17 00:00:00 2001 From: Coly Li <colyli@suse.de> Date: Thu, 28 Feb 2019 20:29:00 +0800 -Subject: [PATCH 5/5] bcache: reserve space for journal_meta() in run time +Subject: [PATCH 6/6] bcache: reserve space for journal_meta() in run time Another journal deadlock of bcache jouranling can happen in normal bcache runtime. It is very rare to happen but there are people report @@ -57,15 +57,15 @@ extend it then. Signed-off-by: Coly Li <colyli@suse.de> --- - drivers/md/bcache/journal.c | 95 +++++++++++++++++++++++++++++++++------------ + drivers/md/bcache/journal.c | 89 +++++++++++++++++++++++++++++++++------------ drivers/md/bcache/journal.h | 1 + - 2 files changed, 72 insertions(+), 24 deletions(-) + 2 files changed, 66 insertions(+), 24 deletions(-) diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c -index d48fd57397ff..9d42f072176c 100644 +index c60a702f53a9..6aa68ab7cd78 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c -@@ -631,7 +631,7 @@ static void journal_reclaim(struct cache_set *c) +@@ -629,7 +629,7 @@ static void journal_reclaim(struct cache_set *c) last = last_available_journal_bucket(c); if ((!last && c->journal.blocks_free) || (last && (c->journal.blocks_free * c->sb.block_size) > @@ -74,7 +74,7 @@ index d48fd57397ff..9d42f072176c 100644 do_wakeup = true; goto out; } -@@ -720,18 +720,27 @@ static void journal_write_unlock(struct closure *cl) +@@ -718,18 +718,27 @@ static void journal_write_unlock(struct closure *cl) spin_unlock(&c->journal.lock); } @@ -85,7 +85,7 @@ index d48fd57397ff..9d42f072176c 100644 { - if (unlikely(journal_full(&c->journal))) - return true; -+ int last = last_available_journal_bucket(c); ++ bool last = last_available_journal_bucket(c); - if (unlikely(last_available_journal_bucket(c) && - (!c->journal.in_replay) && @@ -110,7 +110,7 @@ index d48fd57397ff..9d42f072176c 100644 return false; } -@@ -753,7 +762,9 @@ static void journal_write_unlocked(struct closure *cl) +@@ -751,7 +760,9 @@ static void journal_write_unlocked(struct closure *cl) if (!w->need_write) { closure_return_with_destructor(cl, journal_write_unlock); return; @@ -121,17 +121,7 @@ index d48fd57397ff..9d42f072176c 100644 journal_reclaim(c); spin_unlock(&c->journal.lock); -@@ -805,6 +816,9 @@ static void journal_write_unlocked(struct closure *cl) - /* If KEY_PTRS(k) == 0, this jset gets lost in air */ - BUG_ON(i == 0); - -+ /* If i == 0, this jset gets lost in air */ -+ BUG_ON(i == 0); -+ - atomic_dec_bug(&fifo_back(&c->journal.pin)); - bch_journal_next(&c->journal); - journal_reclaim(c); -@@ -842,16 +856,28 @@ static void journal_try_write(struct cache_set *c) +@@ -840,16 +851,26 @@ static void journal_try_write(struct cache_set *c) } static bool no_journal_wait(struct cache_set *c, @@ -140,16 +130,14 @@ index d48fd57397ff..9d42f072176c 100644 + int nkeys) { + bool is_journal_meta = (nkeys == 0) ? true : false; - int last = last_available_journal_bucket(c); + bool last = last_available_journal_bucket(c); size_t reserved_sectors = 0; - size_t n = min_t(size_t, - c->journal.blocks_free * c->sb.block_size, - PAGE_SECTORS << JSET_BITS); + size_t n; + -+ if (!last) { -+ reserved_sectors = 0; -+ } else { ++ if (unlikely(last)) { + if (!is_journal_meta) + reserved_sectors = BCH_JOURNAL_RESERVE + + BCH_JOURNAL_RPLY_RESERVE; @@ -166,7 +154,7 @@ index d48fd57397ff..9d42f072176c 100644 if (sectors <= (n - reserved_sectors)) return true; -@@ -860,26 +886,47 @@ static bool no_journal_wait(struct cache_set *c, +@@ -858,26 +879,46 @@ static bool no_journal_wait(struct cache_set *c, } static bool should_try_write(struct cache_set *c, @@ -192,7 +180,6 @@ index d48fd57397ff..9d42f072176c 100644 + BUG_ON(empty_jset); return true; + } -+ - /* the check in no_journal_wait exceeds BCH_JOURNAL_RPLY_RESERVE */ - if (w->data->keys == 0) @@ -220,7 +207,7 @@ index d48fd57397ff..9d42f072176c 100644 if (sectors <= (n - reserved_sectors)) return true; -@@ -905,13 +952,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, +@@ -903,13 +944,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, sectors = __set_blocks(w->data, w->data->keys + nkeys, block_bytes(c)) * c->sb.block_size; |