buffer_heads and journal_heads are joined at the hip. We need a lock to protect the joint and its refcounts. JBD is currently using a global spinlock for that. Change it to use one bit in bh->b_state. fs/jbd/checkpoint.c | 4 +- fs/jbd/commit.c | 4 +- fs/jbd/journal.c | 100 ++++++++++++++++++++++++++------------------------- fs/jbd/transaction.c | 10 ++--- include/linux/jbd.h | 12 +++++- 5 files changed, 71 insertions(+), 59 deletions(-) diff -puN fs/jbd/journal.c~jbd-040-journal_add_journal_head-locking fs/jbd/journal.c --- 25/fs/jbd/journal.c~jbd-040-journal_add_journal_head-locking 2003-06-04 01:39:41.000000000 -0700 +++ 25-akpm/fs/jbd/journal.c 2003-06-04 01:39:41.000000000 -0700 @@ -297,7 +297,7 @@ restart: BUFFER_TRACE(bh, "data writeout complete: unfile"); __journal_unfile_buffer(jh); jh->b_transaction = NULL; - __journal_remove_journal_head(bh); + journal_remove_journal_head(bh); __brelse(bh); goto restart; } @@ -1710,59 +1710,49 @@ static void journal_free_journal_head(st struct journal_head *journal_add_journal_head(struct buffer_head *bh) { struct journal_head *jh; + struct journal_head *new_jh = NULL; - spin_lock(&journal_datalist_lock); +repeat: + if (!buffer_jbd(bh)) { + new_jh = journal_alloc_journal_head(); + memset(new_jh, 0, sizeof(*new_jh)); + } + + jbd_lock_bh_journal_head(bh); if (buffer_jbd(bh)) { jh = bh2jh(bh); } else { J_ASSERT_BH(bh, (atomic_read(&bh->b_count) > 0) || (bh->b_page && bh->b_page->mapping)); - spin_unlock(&journal_datalist_lock); - jh = journal_alloc_journal_head(); - memset(jh, 0, sizeof(*jh)); - spin_lock(&journal_datalist_lock); - if (buffer_jbd(bh)) { - /* Someone did it for us! */ - J_ASSERT_BH(bh, bh->b_private != NULL); - journal_free_journal_head(jh); - jh = bh->b_private; - } else { - set_bit(BH_JBD, &bh->b_state); - bh->b_private = jh; - jh->b_bh = bh; - atomic_inc(&bh->b_count); - BUFFER_TRACE(bh, "added journal_head"); + if (!new_jh) { + jbd_unlock_bh_journal_head(bh); + goto repeat; } + + jh = new_jh; + new_jh = NULL; /* We consumed it */ + set_buffer_jbd(bh); + bh->b_private = jh; + jh->b_bh = bh; + get_bh(bh); + BUFFER_TRACE(bh, "added journal_head"); } jh->b_jcount++; - spin_unlock(&journal_datalist_lock); + jbd_unlock_bh_journal_head(bh); + if (new_jh) + journal_free_journal_head(new_jh); return bh->b_private; } -/* - * journal_remove_journal_head(): if the buffer isn't attached to a transaction - * and has a zero b_jcount then remove and release its journal_head. If we did - * see that the buffer is not used by any transaction we also "logically" - * decrement ->b_count. - * - * We in fact take an additional increment on ->b_count as a convenience, - * because the caller usually wants to do additional things with the bh - * after calling here. - * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some - * time. Once the caller has run __brelse(), the buffer is eligible for - * reaping by try_to_free_buffers(). - * - * Requires journal_datalist_lock. - */ -void __journal_remove_journal_head(struct buffer_head *bh) +static void __journal_remove_journal_head(struct buffer_head *bh) { struct journal_head *jh = bh2jh(bh); - assert_spin_locked(&journal_datalist_lock); J_ASSERT_JH(jh, jh->b_jcount >= 0); - atomic_inc(&bh->b_count); + + get_bh(bh); if (jh->b_jcount == 0) { if (jh->b_transaction == NULL && jh->b_next_transaction == NULL && @@ -1772,7 +1762,7 @@ void __journal_remove_journal_head(struc BUFFER_TRACE(bh, "remove journal_head"); bh->b_private = NULL; jh->b_bh = NULL; /* debug, really */ - clear_bit(BH_JBD, &bh->b_state); + clear_buffer_jbd(bh); __brelse(bh); journal_free_journal_head(jh); } else { @@ -1781,26 +1771,38 @@ void __journal_remove_journal_head(struc } } +/* + * journal_remove_journal_head(): if the buffer isn't attached to a transaction + * and has a zero b_jcount then remove and release its journal_head. If we did + * see that the buffer is not used by any transaction we also "logically" + * decrement ->b_count. + * + * We in fact take an additional increment on ->b_count as a convenience, + * because the caller usually wants to do additional things with the bh + * after calling here. + * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some + * time. Once the caller has run __brelse(), the buffer is eligible for + * reaping by try_to_free_buffers(). + */ +void journal_remove_journal_head(struct buffer_head *bh) +{ + jbd_lock_bh_journal_head(bh); + __journal_remove_journal_head(bh); + jbd_unlock_bh_journal_head(bh); +} + void journal_unlock_journal_head(struct journal_head *jh) { - spin_lock(&journal_datalist_lock); + struct buffer_head *bh = jh2bh(jh); + + jbd_lock_bh_journal_head(bh); J_ASSERT_JH(jh, jh->b_jcount > 0); --jh->b_jcount; if (!jh->b_jcount && !jh->b_transaction) { - struct buffer_head *bh; - bh = jh2bh(jh); __journal_remove_journal_head(bh); __brelse(bh); } - - spin_unlock(&journal_datalist_lock); -} - -void journal_remove_journal_head(struct buffer_head *bh) -{ - spin_lock(&journal_datalist_lock); - __journal_remove_journal_head(bh); - spin_unlock(&journal_datalist_lock); + jbd_unlock_bh_journal_head(bh); } /* diff -puN include/linux/jbd.h~jbd-040-journal_add_journal_head-locking include/linux/jbd.h --- 25/include/linux/jbd.h~jbd-040-journal_add_journal_head-locking 2003-06-04 01:39:41.000000000 -0700 +++ 25-akpm/include/linux/jbd.h 2003-06-04 01:39:41.000000000 -0700 @@ -293,6 +293,7 @@ enum jbd_state_bits { BH_RevokeValid, /* Revoked flag is valid */ BH_JBDDirty, /* Is dirty but journaled */ BH_State, /* Pins most journal_head state */ + BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ }; BUFFER_FNS(JBD, jbd) @@ -326,6 +327,16 @@ static inline void jbd_unlock_bh_state(s bit_spin_unlock(BH_State, &bh->b_state); } +static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) +{ + bit_spin_lock(BH_JournalHead, &bh->b_state); +} + +static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) +{ + bit_spin_unlock(BH_JournalHead, &bh->b_state); +} + #define HAVE_JOURNAL_CALLBACK_STATUS /** * struct journal_callback - Base structure for callback information. @@ -956,7 +967,6 @@ extern int journal_force_commit(journ extern struct journal_head *journal_add_journal_head(struct buffer_head *bh); extern void journal_remove_journal_head(struct buffer_head *bh); -extern void __journal_remove_journal_head(struct buffer_head *bh); extern void journal_unlock_journal_head(struct journal_head *jh); /* diff -puN fs/jbd/checkpoint.c~jbd-040-journal_add_journal_head-locking fs/jbd/checkpoint.c --- 25/fs/jbd/checkpoint.c~jbd-040-journal_add_journal_head-locking 2003-06-04 01:39:41.000000000 -0700 +++ 25-akpm/fs/jbd/checkpoint.c 2003-06-04 01:39:41.000000000 -0700 @@ -59,7 +59,7 @@ static int __try_to_free_cp_buf(struct j if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { JBUFFER_TRACE(jh, "remove from checkpoint list"); __journal_remove_checkpoint(jh); - __journal_remove_journal_head(bh); + journal_remove_journal_head(bh); BUFFER_TRACE(bh, "release"); __brelse(bh); ret = 1; @@ -156,7 +156,7 @@ static int __cleanup_transaction(journal if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) { BUFFER_TRACE(bh, "remove from checkpoint"); __journal_remove_checkpoint(jh); - __journal_remove_journal_head(bh); + journal_remove_journal_head(bh); __brelse(bh); ret = 1; } diff -puN fs/jbd/commit.c~jbd-040-journal_add_journal_head-locking fs/jbd/commit.c --- 25/fs/jbd/commit.c~jbd-040-journal_add_journal_head-locking 2003-06-04 01:39:41.000000000 -0700 +++ 25-akpm/fs/jbd/commit.c 2003-06-04 01:39:41.000000000 -0700 @@ -212,7 +212,7 @@ write_out_data_locked: BUFFER_TRACE(bh, "writeout complete: unfile"); __journal_unfile_buffer(jh); jh->b_transaction = NULL; - __journal_remove_journal_head(bh); + journal_remove_journal_head(bh); __brelse(bh); } } @@ -692,7 +692,7 @@ skip_commit: /* The journal should be un J_ASSERT_JH(jh, jh->b_next_transaction == NULL); __journal_unfile_buffer(jh); jh->b_transaction = 0; - __journal_remove_journal_head(bh); + journal_remove_journal_head(bh); __brelse(bh); } spin_unlock(&journal_datalist_lock); diff -puN fs/jbd/transaction.c~jbd-040-journal_add_journal_head-locking fs/jbd/transaction.c --- 25/fs/jbd/transaction.c~jbd-040-journal_add_journal_head-locking 2003-06-04 01:39:41.000000000 -0700 +++ 25-akpm/fs/jbd/transaction.c 2003-06-04 01:39:41.000000000 -0700 @@ -1204,7 +1204,7 @@ void journal_forget (handle_t *handle, s if (jh->b_cp_transaction) { __journal_file_buffer(jh, transaction, BJ_Forget); } else { - __journal_remove_journal_head(bh); + journal_remove_journal_head(bh); __brelse(bh); if (!buffer_jbd(bh)) { spin_unlock(&journal_datalist_lock); @@ -1608,7 +1608,7 @@ static inline int __journal_try_to_free_ JBUFFER_TRACE(jh, "release data"); __journal_unfile_buffer(jh); jh->b_transaction = 0; - __journal_remove_journal_head(bh); + journal_remove_journal_head(bh); __brelse(bh); } } else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) { @@ -1616,7 +1616,7 @@ static inline int __journal_try_to_free_ if (jh->b_jlist == BJ_None) { JBUFFER_TRACE(jh, "remove from checkpoint list"); __journal_remove_checkpoint(jh); - __journal_remove_journal_head(bh); + journal_remove_journal_head(bh); __brelse(bh); } } @@ -1710,7 +1710,7 @@ static int dispose_buffer(struct journal may_free = 0; } else { JBUFFER_TRACE(jh, "on running transaction"); - __journal_remove_journal_head(bh); + journal_remove_journal_head(bh); __brelse(bh); } spin_unlock(&journal_datalist_lock); @@ -2051,7 +2051,7 @@ void journal_refile_buffer(struct journa bh = jh2bh(jh); __journal_refile_buffer(jh); - __journal_remove_journal_head(bh); + journal_remove_journal_head(bh); spin_unlock(&journal_datalist_lock); __brelse(bh); _