log_do_checkpoint is playing around with a transaction pointer without enough locking to ensure that it is valid. Fix that up by revalidating the transaction after acquiring the right locks. fs/jbd/checkpoint.c | 53 ++++++++++++++++++++++++++++++--------------------- fs/jbd/journal.c | 0 fs/jbd/transaction.c | 0 include/linux/jbd.h | 0 4 files changed, 32 insertions(+), 21 deletions(-) diff -puN fs/jbd/checkpoint.c~jbd-660-log_do_checkpoint-fix fs/jbd/checkpoint.c --- 25/fs/jbd/checkpoint.c~jbd-660-log_do_checkpoint-fix 2003-06-10 19:41:08.000000000 -0700 +++ 25-akpm/fs/jbd/checkpoint.c 2003-06-10 20:35:30.000000000 -0700 @@ -162,12 +162,12 @@ static int __cleanup_transaction(journal } if (jh->b_transaction != NULL) { - transaction_t *transaction = jh->b_transaction; - tid_t tid = transaction->t_tid; + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); - log_start_commit(journal, transaction); + log_start_commit(journal, t); log_wait_commit(journal, tid); goto out_return_1; } @@ -213,7 +213,7 @@ __flush_batch(journal_t *journal, struct spin_lock(&journal->j_list_lock); for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; - clear_bit(BH_JWrite, &bh->b_state); + clear_buffer_jwrite(bh); BUFFER_TRACE(bh, "brelse"); __brelse(bh); } @@ -290,7 +290,6 @@ static int __flush_buffer(journal_t *jou /* @@@ `nblocks' is unused. Should it be used? */ int log_do_checkpoint(journal_t *journal, int nblocks) { - transaction_t *transaction, *last_transaction, *next_transaction; int result; int batch_count = 0; struct buffer_head *bhs[NR_BATCH]; @@ -316,20 +315,15 @@ int log_do_checkpoint(journal_t *journal * degenerates into a busy loop at unmount time. */ spin_lock(&journal->j_list_lock); -repeat: - transaction = journal->j_checkpoint_transactions; - if (transaction == NULL) - goto done; - last_transaction = transaction->t_cpprev; - next_transaction = transaction; - - do { + while (journal->j_checkpoint_transactions) { + transaction_t *transaction; struct journal_head *jh, *last_jh, *next_jh; int drop_count = 0; int cleanup_ret, retry = 0; + tid_t this_tid; - transaction = next_transaction; - next_transaction = transaction->t_cpnext; + transaction = journal->j_checkpoint_transactions->t_cpprev; + this_tid = transaction->t_tid; jh = transaction->t_checkpoint_list; last_jh = jh->b_cpprev; next_jh = jh; @@ -342,6 +336,7 @@ repeat: if (!jbd_trylock_bh_state(bh)) { jbd_sync_bh(journal, bh); spin_lock(&journal->j_list_lock); + retry = 1; break; } retry = __flush_buffer(journal, jh, bhs, &batch_count, @@ -349,10 +344,29 @@ repeat: } while (jh != last_jh && !retry); if (batch_count) { __flush_batch(journal, bhs, &batch_count); - goto repeat; + continue; } if (retry) - goto repeat; + continue; + + /* + * If someone emptied the checkpoint list while we slept, we're + * done. + */ + if (!journal->j_checkpoint_transactions) + break; + /* + * If someone cleaned up this transaction while we slept, we're + * done + */ + if (journal->j_checkpoint_transactions->t_cpprev != transaction) + continue; + /* + * Maybe it's a new transaction, but it fell at the same + * address + */ + if (transaction->t_tid != this_tid) + continue; /* * We have walked the whole transaction list without * finding anything to write to disk. We had better be @@ -360,10 +374,7 @@ repeat: */ cleanup_ret = __cleanup_transaction(journal, transaction); J_ASSERT(drop_count != 0 || cleanup_ret != 0); - goto repeat; /* __cleanup may have dropped lock */ - } while (transaction != last_transaction); - -done: + } spin_unlock(&journal->j_list_lock); result = cleanup_journal_tail(journal); if (result < 0) diff -puN fs/jbd/transaction.c~jbd-660-log_do_checkpoint-fix fs/jbd/transaction.c diff -puN fs/jbd/journal.c~jbd-660-log_do_checkpoint-fix fs/jbd/journal.c diff -puN include/linux/jbd.h~jbd-660-log_do_checkpoint-fix include/linux/jbd.h _