From: Alex Tomas Some transaction checkpointing improvements for the JBD commit phase. Decent speedups: creation of 500K files in single dir (with htree, of course): before: 4m16.094s, 4m12.035s, 4m11.911s after: 1m41.364s, 1m43.461s, 1m45.189s removal of 500K files in single dir: before: 43m50.161s after: 38m45.510s - Make __log_wait_for_space() recalculate the needed blocks because journal free space changes during commit - Make log_do_checkpoint() starts scanning from the oldest transaction - Make log_do_checkpoint() stop scanning if a transaction gets dropped. The caller will reevaluate the transaction state and decide whether more space needs to be generated in the log. The effect of this is to smooth out the I/O patterns, avoid the huge stop-and-go which currently happens when forced checkpointing writes out and waits upon 3/4 of the journal's size worth of data. fs/jbd/checkpoint.c | 34 +++++++++++++++++++++------------- fs/jbd/journal.c | 4 ++-- fs/jbd/transaction.c | 9 ++------- include/linux/jbd.h | 17 +++++++++++++++-- 4 files changed, 40 insertions(+), 24 deletions(-) diff -puN fs/jbd/checkpoint.c~jbd-commit-tricks fs/jbd/checkpoint.c --- 25/fs/jbd/checkpoint.c~jbd-commit-tricks 2003-07-04 20:11:49.000000000 -0700 +++ 25-akpm/fs/jbd/checkpoint.c 2003-07-04 21:19:25.000000000 -0700 @@ -75,11 +75,12 @@ static int __try_to_free_cp_buf(struct j * Called under j-state_lock *only*. It will be unlocked if we have to wait * for a checkpoint to free up some space in the log. */ - -void __log_wait_for_space(journal_t *journal, int nblocks) +void __log_wait_for_space(journal_t *journal) { + int nblocks; assert_spin_locked(&journal->j_state_lock); + nblocks = jbd_space_needed(journal); while (__log_space_left(journal) < nblocks) { if (journal->j_flags & JFS_ABORT) return; @@ -91,9 +92,10 @@ void __log_wait_for_space(journal_t *jou * were waiting for the checkpoint lock */ spin_lock(&journal->j_state_lock); + nblocks = jbd_space_needed(journal); if (__log_space_left(journal) < nblocks) { spin_unlock(&journal->j_state_lock); - log_do_checkpoint(journal, nblocks); + log_do_checkpoint(journal); spin_lock(&journal->j_state_lock); } up(&journal->j_checkpoint_sem); @@ -225,7 +227,7 @@ __flush_batch(journal_t *journal, struct */ static int __flush_buffer(journal_t *journal, struct journal_head *jh, struct buffer_head **bhs, int *batch_count, - int *drop_count) + int *drop_count, int *freed) { struct buffer_head *bh = jh2bh(jh); int ret = 0; @@ -262,6 +264,8 @@ static int __flush_buffer(journal_t *jou if (__try_to_free_cp_buf(jh)) { (*drop_count)++; ret = last_buffer; + if (last_buffer) + *freed = 1; } } return ret; @@ -279,9 +283,7 @@ static int __flush_buffer(journal_t *jou * * The journal should be locked before calling this function. */ - -/* @@@ `nblocks' is unused. Should it be used? */ -int log_do_checkpoint(journal_t *journal, int nblocks) +int log_do_checkpoint(journal_t *journal) { int result; int batch_count = 0; @@ -314,8 +316,9 @@ int log_do_checkpoint(journal_t *journal int drop_count = 0; int cleanup_ret, retry = 0; tid_t this_tid; + int freed = 0; - transaction = journal->j_checkpoint_transactions->t_cpnext; + transaction = journal->j_checkpoint_transactions; this_tid = transaction->t_tid; jh = transaction->t_checkpoint_list; last_jh = jh->b_cpprev; @@ -333,12 +336,17 @@ int log_do_checkpoint(journal_t *journal break; } retry = __flush_buffer(journal, jh, bhs, &batch_count, - &drop_count); + &drop_count, &freed); } while (jh != last_jh && !retry); - if (batch_count) { + + if (batch_count) __flush_batch(journal, bhs, &batch_count); - continue; - } + /* + * transaction was freed, so we return to check whether + * sufficient log space was made available. + */ + if (freed) + break; if (retry) continue; @@ -352,7 +360,7 @@ int log_do_checkpoint(journal_t *journal * If someone cleaned up this transaction while we slept, we're * done */ - if (journal->j_checkpoint_transactions->t_cpnext != transaction) + if (journal->j_checkpoint_transactions != transaction) continue; /* * Maybe it's a new transaction, but it fell at the same diff -puN fs/jbd/transaction.c~jbd-commit-tricks fs/jbd/transaction.c --- 25/fs/jbd/transaction.c~jbd-commit-tricks 2003-07-04 20:11:49.000000000 -0700 +++ 25-akpm/fs/jbd/transaction.c 2003-07-04 21:53:03.000000000 -0700 @@ -206,15 +206,10 @@ repeat_locked: * Also, this test is inconsitent with the matching one in * journal_extend(). */ - needed = journal->j_max_transaction_buffers; - if (journal->j_committing_transaction) - needed += journal->j_committing_transaction-> - t_outstanding_credits; - - if (__log_space_left(journal) < needed) { + if (__log_space_left(journal) < jbd_space_needed(journal)) { jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); spin_unlock(&transaction->t_handle_lock); - __log_wait_for_space(journal, needed); + __log_wait_for_space(journal); goto repeat_locked; } diff -puN fs/jbd/journal.c~jbd-commit-tricks fs/jbd/journal.c --- 25/fs/jbd/journal.c~jbd-commit-tricks 2003-07-04 20:11:49.000000000 -0700 +++ 25-akpm/fs/jbd/journal.c 2003-07-04 20:11:49.000000000 -0700 @@ -1076,7 +1076,7 @@ void journal_destroy(journal_t *journal) spin_lock(&journal->j_list_lock); while (journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); - log_do_checkpoint(journal, 1); + log_do_checkpoint(journal); spin_lock(&journal->j_list_lock); } @@ -1284,7 +1284,7 @@ int journal_flush(journal_t *journal) spin_lock(&journal->j_list_lock); while (!err && journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); - err = log_do_checkpoint(journal, journal->j_maxlen); + err = log_do_checkpoint(journal); spin_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); diff -puN include/linux/jbd.h~jbd-commit-tricks include/linux/jbd.h --- 25/include/linux/jbd.h~jbd-commit-tricks 2003-07-04 20:11:49.000000000 -0700 +++ 25-akpm/include/linux/jbd.h 2003-07-04 21:19:25.000000000 -0700 @@ -992,9 +992,9 @@ int log_start_commit(journal_t *journal, int __log_start_commit(journal_t *journal, tid_t tid); int journal_start_commit(journal_t *journal, tid_t *tid); int log_wait_commit(journal_t *journal, tid_t tid); -int log_do_checkpoint(journal_t *journal, int nblocks); +int log_do_checkpoint(journal_t *journal); -void __log_wait_for_space(journal_t *journal, int nblocks); +void __log_wait_for_space(journal_t *journal); extern void __journal_drop_transaction(journal_t *, transaction_t *); extern int cleanup_journal_tail(journal_t *); @@ -1054,6 +1054,19 @@ static inline int tid_geq(tid_t x, tid_t extern int journal_blocks_per_page(struct inode *inode); /* + * Return the minimum number of blocks which must be free in the journal + * before a new transaction may be started. Must be called under j_state_lock. + */ +static inline int jbd_space_needed(journal_t *journal) +{ + int nblocks = journal->j_max_transaction_buffers; + if (journal->j_committing_transaction) + nblocks += journal->j_committing_transaction-> + t_outstanding_credits; + return nblocks; +} + +/* * Definitions which augment the buffer_head layer */ _