aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorColy Li <colyli@suse.de>2019-03-02 18:36:56 +0800
committerColy Li <colyli@suse.de>2019-03-02 18:36:56 +0800
commit66f637bea17f25b4c4e6ce9f188c28c8b0ad2e7c (patch)
tree6fac1d73caad01590ef542431a524288ef7fe962
parente7a223030c43cab8000b66e35b841ce5a3ba9b8a (diff)
downloadbcache-patches-66f637bea17f25b4c4e6ce9f188c28c8b0ad2e7c.tar.gz
for-test: update journal deadlock fixes
-rw-r--r--for-test/jouranl-deadlock/0001-bcache-acquire-c-journal.lock-in-bch_btree_leaf_dirt.patch51
-rw-r--r--for-test/jouranl-deadlock/0002-bcache-move-definition-of-int-ret-out-of-macro-read_.patch (renamed from for-test/jouranl-deadlock/0001-bcache-move-definition-of-int-ret-out-of-macro-read_.patch)4
-rw-r--r--for-test/jouranl-deadlock/0003-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-jou.patch (renamed from for-test/jouranl-deadlock/0002-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-jou.patch)4
-rw-r--r--for-test/jouranl-deadlock/0004-bcache-reload-jouranl-key-information-during-journal.patch (renamed from for-test/jouranl-deadlock/0003-bcache-reload-jouranl-key-information-during-journal.patch)18
-rw-r--r--for-test/jouranl-deadlock/0005-bcache-fix-journal-deadlock-during-jouranl-replay.patch (renamed from for-test/jouranl-deadlock/0004-bcache-fix-journal-deadlock-during-jouranl-replay.patch)30
-rw-r--r--for-test/jouranl-deadlock/0006-bcache-reserve-space-for-journal_meta-in-run-time.patch (renamed from for-test/jouranl-deadlock/0005-bcache-reserve-space-for-journal_meta-in-run-time.patch)41
6 files changed, 92 insertions, 56 deletions
diff --git a/for-test/jouranl-deadlock/0001-bcache-acquire-c-journal.lock-in-bch_btree_leaf_dirt.patch b/for-test/jouranl-deadlock/0001-bcache-acquire-c-journal.lock-in-bch_btree_leaf_dirt.patch
new file mode 100644
index 0000000..358c3c0
--- /dev/null
+++ b/for-test/jouranl-deadlock/0001-bcache-acquire-c-journal.lock-in-bch_btree_leaf_dirt.patch
@@ -0,0 +1,51 @@
+From 3c7e66546d18ead01bd821fa07f3ca2c73a9d964 Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Sat, 2 Mar 2019 18:19:08 +0800
+Subject: [PATCH 1/6] bcache: acquire c->journal.lock in bch_btree_leaf_dirty()
+
+In bch_btree_leaf_dirty() when increase bcache journal pin counter,
+current code uses atomic_inc(w->journal) directly. This is problematic
+indeed, which may cause following code in journal.c:journal_reclaim()
+not work properly,
+ 610 while (!atomic_read(&fifo_front(&c->journal.pin)))
+ 611 fifo_pop(&c->journal.pin, p);
+
+The above code piece is protected by spinlock c->journal.lock, and
+the atomic counter w->journal in btree.c:bch_btree_leaf_dirty() is one
+of the nodes from c->journal.pin. If the above while() loop just happens
+to reach a fifo node which is w->journal in bch_btree_leaf_dirty(),
+it is possible that the between line 610 and 611 the counter w->journal
+is increased but poped off in journal_reclaim(). Then the journal jset
+which w->journal referenced in bch_btree_leaf_dirty() gets lost.
+
+If system crashes or reboots before bkeys of the lost jset flushing back
+to bcache btree node, journal_replay() after the reboot may complains
+some journal entries lost and fail to register cache set.
+
+Such race condition is very rare to happen, I observe such issue when
+I modify the journal buckets number to 3, which makes only a limited
+number of jset being available. Then it is possible to observe journal
+replay failure due to lost journal jset(s).
+
+Signed-off-by: Coly Li <colyli@suse.de>
+---
+ drivers/md/bcache/btree.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
+index 23cb1dc7296b..ac1b9159402e 100644
+--- a/drivers/md/bcache/btree.c
++++ b/drivers/md/bcache/btree.c
+@@ -551,7 +551,9 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
+
+ if (!w->journal) {
+ w->journal = journal_ref;
++ spin_lock(&b->c->journal.lock);
+ atomic_inc(w->journal);
++ spin_unlock(&b->c->journal.lock);
+ }
+ }
+
+--
+2.16.4
+
diff --git a/for-test/jouranl-deadlock/0001-bcache-move-definition-of-int-ret-out-of-macro-read_.patch b/for-test/jouranl-deadlock/0002-bcache-move-definition-of-int-ret-out-of-macro-read_.patch
index 8abdfa8..1212a23 100644
--- a/for-test/jouranl-deadlock/0001-bcache-move-definition-of-int-ret-out-of-macro-read_.patch
+++ b/for-test/jouranl-deadlock/0002-bcache-move-definition-of-int-ret-out-of-macro-read_.patch
@@ -1,7 +1,7 @@
-From 9267af6ab8e85a8c5fd836fa24e01d5962aaa01e Mon Sep 17 00:00:00 2001
+From da41d81e0abd211d2990d848cd33744ff335cd43 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 27 Feb 2019 18:10:48 +0800
-Subject: [PATCH 1/5] bcache: move definition of 'int ret' out of macro
+Subject: [PATCH 2/6] bcache: move definition of 'int ret' out of macro
read_bucket()
'int ret' is defined as a local variable inside macro read_bucket().
diff --git a/for-test/jouranl-deadlock/0002-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-jou.patch b/for-test/jouranl-deadlock/0003-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-jou.patch
index f246cd9..3353e92 100644
--- a/for-test/jouranl-deadlock/0002-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-jou.patch
+++ b/for-test/jouranl-deadlock/0003-bcache-never-set-0-to-KEY_PTRS-of-jouranl-key-in-jou.patch
@@ -1,7 +1,7 @@
-From c0b8b24f794ec53aa501c28278c2f10711f6cccf Mon Sep 17 00:00:00 2001
+From d8c81f7cdb63bc3a2b00a8a9e5e9b4783e42c702 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 27 Feb 2019 20:22:23 +0800
-Subject: [PATCH 2/5] bcache: never set 0 to KEY_PTRS of jouranl key in
+Subject: [PATCH 3/6] bcache: never set 0 to KEY_PTRS of jouranl key in
journal_reclaim()
In journal_reclaim() ja->cur_idx of each cache will be update to
diff --git a/for-test/jouranl-deadlock/0003-bcache-reload-jouranl-key-information-during-journal.patch b/for-test/jouranl-deadlock/0004-bcache-reload-jouranl-key-information-during-journal.patch
index 8ef3bfe..45f0823 100644
--- a/for-test/jouranl-deadlock/0003-bcache-reload-jouranl-key-information-during-journal.patch
+++ b/for-test/jouranl-deadlock/0004-bcache-reload-jouranl-key-information-during-journal.patch
@@ -1,7 +1,7 @@
-From 2414835ec02c84f592361eda29a6c45112b9cd5d Mon Sep 17 00:00:00 2001
+From 1ff320546f894a6067c6a73bfaa937fca20308de Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 27 Feb 2019 20:32:22 +0800
-Subject: [PATCH 3/5] bcache: reload jouranl key information during journal
+Subject: [PATCH 4/6] bcache: reload jouranl key information during journal
replay
When bcache journal initiates during running cache set, cache set
@@ -47,14 +47,14 @@ in bch_journal_read() before replying journal by bch_journal_replay().
Signed-off-by: Coly Li <colyli@suse.de>
---
- drivers/md/bcache/journal.c | 89 +++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 89 insertions(+)
+ drivers/md/bcache/journal.c | 87 +++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 87 insertions(+)
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 5180bed911ef..9c43e0d57f9b 100644
+index 5180bed911ef..a6deb16c15c8 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
-@@ -143,6 +143,91 @@ reread: left = ca->sb.bucket_size - offset;
+@@ -143,6 +143,89 @@ reread: left = ca->sb.bucket_size - offset;
return ret;
}
@@ -122,9 +122,8 @@ index 5180bed911ef..9c43e0d57f9b 100644
+
+ offset += blocks * ca->sb.block_size;
+ len -= blocks * ca->sb.block_size;
-+ j = ((void *) j) + blocks * ca->sb.block_size;
++ j = ((void *) j) + blocks * block_bytes(ca);
+ }
-+
+ }
+out:
+ c->journal.blocks_free =
@@ -132,7 +131,6 @@ index 5180bed911ef..9c43e0d57f9b 100644
+ used_blocks;
+
+ k->ptr[n++] = MAKE_PTR(0, bucket, ca->sb.nr_this_dev);
-+
+ }
+
+ BUG_ON(n == 0);
@@ -146,7 +144,7 @@ index 5180bed911ef..9c43e0d57f9b 100644
int bch_journal_read(struct cache_set *c, struct list_head *list)
{
#define read_bucket(b) \
-@@ -268,6 +353,10 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
+@@ -268,6 +351,10 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
struct journal_replay,
list)->j.seq;
diff --git a/for-test/jouranl-deadlock/0004-bcache-fix-journal-deadlock-during-jouranl-replay.patch b/for-test/jouranl-deadlock/0005-bcache-fix-journal-deadlock-during-jouranl-replay.patch
index a9e1ff1..82d1cea 100644
--- a/for-test/jouranl-deadlock/0004-bcache-fix-journal-deadlock-during-jouranl-replay.patch
+++ b/for-test/jouranl-deadlock/0005-bcache-fix-journal-deadlock-during-jouranl-replay.patch
@@ -1,7 +1,7 @@
-From a29ac97f8fa25a195d594a6c1c4333cc924d6b59 Mon Sep 17 00:00:00 2001
+From ee8cbff3518dcaf67c16cff0cefe2a4424573bff Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 27 Feb 2019 20:35:02 +0800
-Subject: [PATCH 4/5] bcache: fix journal deadlock during jouranl replay
+Subject: [PATCH 5/6] bcache: fix journal deadlock during jouranl replay
A deadlock of bcache jouranling may happen during journal replay. Such
deadlock happens when,
@@ -60,10 +60,10 @@ Signed-off-by: Coly Li <colyli@suse.de>
2 files changed, 97 insertions(+), 7 deletions(-)
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index 9c43e0d57f9b..d48fd57397ff 100644
+index a6deb16c15c8..c60a702f53a9 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
-@@ -417,6 +417,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
+@@ -415,6 +415,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
uint64_t start = i->j.last_seq, end = i->j.seq, n = start;
struct keylist keylist;
@@ -72,7 +72,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644
list_for_each_entry(i, list, list) {
BUG_ON(i->pin && atomic_read(i->pin) != 1);
-@@ -450,6 +452,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
+@@ -448,6 +450,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
pr_info("journal replay done, %i keys in %i entries, seq %llu",
keys, entries, end);
err:
@@ -80,7 +80,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644
while (!list_empty(list)) {
i = list_first_entry(list, struct journal_replay, list);
list_del(&i->list);
-@@ -579,6 +582,22 @@ static void do_journal_discard(struct cache *ca)
+@@ -577,6 +580,22 @@ static void do_journal_discard(struct cache *ca)
}
}
@@ -103,7 +103,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644
static void journal_reclaim(struct cache_set *c)
{
struct bkey *k = &c->journal.key;
-@@ -586,6 +605,7 @@ static void journal_reclaim(struct cache_set *c)
+@@ -584,6 +603,7 @@ static void journal_reclaim(struct cache_set *c)
uint64_t last_seq;
unsigned int iter, n = 0;
atomic_t p __maybe_unused;
@@ -111,7 +111,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644
atomic_long_inc(&c->reclaim);
-@@ -608,8 +628,13 @@ static void journal_reclaim(struct cache_set *c)
+@@ -606,8 +626,13 @@ static void journal_reclaim(struct cache_set *c)
for_each_cache(ca, c, iter)
do_journal_discard(ca);
@@ -126,7 +126,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644
/*
* Allocate:
-@@ -634,9 +659,10 @@ static void journal_reclaim(struct cache_set *c)
+@@ -632,9 +657,10 @@ static void journal_reclaim(struct cache_set *c)
bkey_init(k);
SET_KEY_PTRS(k, n);
c->journal.blocks_free = c->sb.bucket_size >> c->block_bits;
@@ -138,7 +138,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644
__closure_wake_up(&c->journal.wait);
}
-@@ -694,6 +720,21 @@ static void journal_write_unlock(struct closure *cl)
+@@ -692,6 +718,21 @@ static void journal_write_unlock(struct closure *cl)
spin_unlock(&c->journal.lock);
}
@@ -160,7 +160,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644
static void journal_write_unlocked(struct closure *cl)
__releases(c->journal.lock)
{
-@@ -712,7 +753,7 @@ static void journal_write_unlocked(struct closure *cl)
+@@ -710,7 +751,7 @@ static void journal_write_unlocked(struct closure *cl)
if (!w->need_write) {
closure_return_with_destructor(cl, journal_write_unlock);
return;
@@ -169,14 +169,14 @@ index 9c43e0d57f9b..d48fd57397ff 100644
journal_reclaim(c);
spin_unlock(&c->journal.lock);
-@@ -800,6 +841,52 @@ static void journal_try_write(struct cache_set *c)
+@@ -798,6 +839,52 @@ static void journal_try_write(struct cache_set *c)
}
}
+static bool no_journal_wait(struct cache_set *c,
+ size_t sectors)
+{
-+ int last = last_available_journal_bucket(c);
++ bool last = last_available_journal_bucket(c);
+ size_t reserved_sectors = 0;
+ size_t n = min_t(size_t,
+ c->journal.blocks_free * c->sb.block_size,
@@ -222,7 +222,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644
static struct journal_write *journal_wait_for_write(struct cache_set *c,
unsigned int nkeys)
__acquires(&c->journal.lock)
-@@ -818,15 +905,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
+@@ -816,15 +903,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
sectors = __set_blocks(w->data, w->data->keys + nkeys,
block_bytes(c)) * c->sb.block_size;
@@ -240,7 +240,7 @@ index 9c43e0d57f9b..d48fd57397ff 100644
if (wait)
trace_bcache_journal_entry_full(c);
-@@ -935,6 +1020,7 @@ int bch_journal_alloc(struct cache_set *c)
+@@ -933,6 +1018,7 @@ int bch_journal_alloc(struct cache_set *c)
INIT_DELAYED_WORK(&j->work, journal_write_work);
c->journal_delay_ms = 100;
diff --git a/for-test/jouranl-deadlock/0005-bcache-reserve-space-for-journal_meta-in-run-time.patch b/for-test/jouranl-deadlock/0006-bcache-reserve-space-for-journal_meta-in-run-time.patch
index d1cd1bd..c955391 100644
--- a/for-test/jouranl-deadlock/0005-bcache-reserve-space-for-journal_meta-in-run-time.patch
+++ b/for-test/jouranl-deadlock/0006-bcache-reserve-space-for-journal_meta-in-run-time.patch
@@ -1,7 +1,7 @@
-From 4a1a6857a7cdd0018cbb5062eae625fb85e2ad6d Mon Sep 17 00:00:00 2001
+From 60b326d839c8df0528c9567db590173a8d11060b Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Thu, 28 Feb 2019 20:29:00 +0800
-Subject: [PATCH 5/5] bcache: reserve space for journal_meta() in run time
+Subject: [PATCH 6/6] bcache: reserve space for journal_meta() in run time
Another journal deadlock of bcache jouranling can happen in normal
bcache runtime. It is very rare to happen but there are people report
@@ -57,15 +57,15 @@ extend it then.
Signed-off-by: Coly Li <colyli@suse.de>
---
- drivers/md/bcache/journal.c | 95 +++++++++++++++++++++++++++++++++------------
+ drivers/md/bcache/journal.c | 89 +++++++++++++++++++++++++++++++++------------
drivers/md/bcache/journal.h | 1 +
- 2 files changed, 72 insertions(+), 24 deletions(-)
+ 2 files changed, 66 insertions(+), 24 deletions(-)
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
-index d48fd57397ff..9d42f072176c 100644
+index c60a702f53a9..6aa68ab7cd78 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
-@@ -631,7 +631,7 @@ static void journal_reclaim(struct cache_set *c)
+@@ -629,7 +629,7 @@ static void journal_reclaim(struct cache_set *c)
last = last_available_journal_bucket(c);
if ((!last && c->journal.blocks_free) ||
(last && (c->journal.blocks_free * c->sb.block_size) >
@@ -74,7 +74,7 @@ index d48fd57397ff..9d42f072176c 100644
do_wakeup = true;
goto out;
}
-@@ -720,18 +720,27 @@ static void journal_write_unlock(struct closure *cl)
+@@ -718,18 +718,27 @@ static void journal_write_unlock(struct closure *cl)
spin_unlock(&c->journal.lock);
}
@@ -85,7 +85,7 @@ index d48fd57397ff..9d42f072176c 100644
{
- if (unlikely(journal_full(&c->journal)))
- return true;
-+ int last = last_available_journal_bucket(c);
++ bool last = last_available_journal_bucket(c);
- if (unlikely(last_available_journal_bucket(c) &&
- (!c->journal.in_replay) &&
@@ -110,7 +110,7 @@ index d48fd57397ff..9d42f072176c 100644
return false;
}
-@@ -753,7 +762,9 @@ static void journal_write_unlocked(struct closure *cl)
+@@ -751,7 +760,9 @@ static void journal_write_unlocked(struct closure *cl)
if (!w->need_write) {
closure_return_with_destructor(cl, journal_write_unlock);
return;
@@ -121,17 +121,7 @@ index d48fd57397ff..9d42f072176c 100644
journal_reclaim(c);
spin_unlock(&c->journal.lock);
-@@ -805,6 +816,9 @@ static void journal_write_unlocked(struct closure *cl)
- /* If KEY_PTRS(k) == 0, this jset gets lost in air */
- BUG_ON(i == 0);
-
-+ /* If i == 0, this jset gets lost in air */
-+ BUG_ON(i == 0);
-+
- atomic_dec_bug(&fifo_back(&c->journal.pin));
- bch_journal_next(&c->journal);
- journal_reclaim(c);
-@@ -842,16 +856,28 @@ static void journal_try_write(struct cache_set *c)
+@@ -840,16 +851,26 @@ static void journal_try_write(struct cache_set *c)
}
static bool no_journal_wait(struct cache_set *c,
@@ -140,16 +130,14 @@ index d48fd57397ff..9d42f072176c 100644
+ int nkeys)
{
+ bool is_journal_meta = (nkeys == 0) ? true : false;
- int last = last_available_journal_bucket(c);
+ bool last = last_available_journal_bucket(c);
size_t reserved_sectors = 0;
- size_t n = min_t(size_t,
- c->journal.blocks_free * c->sb.block_size,
- PAGE_SECTORS << JSET_BITS);
+ size_t n;
+
-+ if (!last) {
-+ reserved_sectors = 0;
-+ } else {
++ if (unlikely(last)) {
+ if (!is_journal_meta)
+ reserved_sectors = BCH_JOURNAL_RESERVE +
+ BCH_JOURNAL_RPLY_RESERVE;
@@ -166,7 +154,7 @@ index d48fd57397ff..9d42f072176c 100644
if (sectors <= (n - reserved_sectors))
return true;
-@@ -860,26 +886,47 @@ static bool no_journal_wait(struct cache_set *c,
+@@ -858,26 +879,46 @@ static bool no_journal_wait(struct cache_set *c,
}
static bool should_try_write(struct cache_set *c,
@@ -192,7 +180,6 @@ index d48fd57397ff..9d42f072176c 100644
+ BUG_ON(empty_jset);
return true;
+ }
-+
- /* the check in no_journal_wait exceeds BCH_JOURNAL_RPLY_RESERVE */
- if (w->data->keys == 0)
@@ -220,7 +207,7 @@ index d48fd57397ff..9d42f072176c 100644
if (sectors <= (n - reserved_sectors))
return true;
-@@ -905,13 +952,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
+@@ -903,13 +944,13 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c,
sectors = __set_blocks(w->data, w->data->keys + nkeys,
block_bytes(c)) * c->sb.block_size;