From: Alex Tomas We're about to remove lock_journal(), and it is lock_journal which separates the running and committing transaction's revokes on the single revoke table. So implement two revoke tables and rotate them at commit time. 25-akpm/fs/jbd/commit.c | 12 ++--- 25-akpm/fs/jbd/revoke.c | 97 ++++++++++++++++++++++++++++++++++++++------ 25-akpm/include/linux/jbd.h | 10 ++-- 3 files changed, 96 insertions(+), 23 deletions(-) diff -puN fs/jbd/revoke.c~jbd-240-dual-revoke-tables fs/jbd/revoke.c --- 25/fs/jbd/revoke.c~jbd-240-dual-revoke-tables Thu Jun 5 15:14:28 2003 +++ 25-akpm/fs/jbd/revoke.c Thu Jun 5 15:14:28 2003 @@ -129,7 +129,9 @@ repeat: record->sequence = seq; record->blocknr = blocknr; hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; + spin_lock(&journal->j_revoke_lock); list_add(&record->hash, hash_list); + spin_unlock(&journal->j_revoke_lock); return 0; oom: @@ -150,12 +152,16 @@ static struct jbd_revoke_record_s *find_ hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; + spin_lock(&journal->j_revoke_lock); record = (struct jbd_revoke_record_s *) hash_list->next; while (&(record->hash) != hash_list) { - if (record->blocknr == blocknr) + if (record->blocknr == blocknr) { + spin_unlock(&journal->j_revoke_lock); return record; + } record = (struct jbd_revoke_record_s *) record->hash.next; } + spin_unlock(&journal->j_revoke_lock); return NULL; } @@ -192,27 +198,29 @@ int journal_init_revoke(journal_t *journ { int shift, tmp; - J_ASSERT (journal->j_revoke == NULL); + J_ASSERT (journal->j_revoke_table[0] == NULL); - journal->j_revoke = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke) + shift = 0; + tmp = hash_size; + while((tmp >>= 1UL) != 0UL) + shift++; + + journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); + if (!journal->j_revoke_table[0]) return -ENOMEM; + journal->j_revoke = journal->j_revoke_table[0]; /* Check that the hash_size is a power of two */ J_ASSERT ((hash_size & (hash_size-1)) == 0); journal->j_revoke->hash_size = hash_size; - shift = 0; - tmp = hash_size; - while((tmp >>= 1UL) != 0UL) - shift++; journal->j_revoke->hash_shift = shift; journal->j_revoke->hash_table = kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); if (!journal->j_revoke->hash_table) { - kmem_cache_free(revoke_table_cache, journal->j_revoke); + kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); journal->j_revoke = NULL; return -ENOMEM; } @@ -220,6 +228,37 @@ int journal_init_revoke(journal_t *journ for (tmp = 0; tmp < hash_size; tmp++) INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); + if (!journal->j_revoke_table[1]) { + kfree(journal->j_revoke_table[0]->hash_table); + kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); + return -ENOMEM; + } + + journal->j_revoke = journal->j_revoke_table[1]; + + /* Check that the hash_size is a power of two */ + J_ASSERT ((hash_size & (hash_size-1)) == 0); + + journal->j_revoke->hash_size = hash_size; + + journal->j_revoke->hash_shift = shift; + + journal->j_revoke->hash_table = + kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); + if (!journal->j_revoke->hash_table) { + kfree(journal->j_revoke_table[0]->hash_table); + kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); + kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]); + journal->j_revoke = NULL; + return -ENOMEM; + } + + for (tmp = 0; tmp < hash_size; tmp++) + INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + + spin_lock_init(&journal->j_revoke_lock); + return 0; } @@ -231,7 +270,20 @@ void journal_destroy_revoke(journal_t *j struct list_head *hash_list; int i; - table = journal->j_revoke; + table = journal->j_revoke_table[0]; + if (!table) + return; + + for (i=0; ihash_size; i++) { + hash_list = &table->hash_table[i]; + J_ASSERT (list_empty(hash_list)); + } + + kfree(table->hash_table); + kmem_cache_free(revoke_table_cache, table); + journal->j_revoke = NULL; + + table = journal->j_revoke_table[1]; if (!table) return; @@ -337,11 +389,9 @@ int journal_revoke(handle_t *handle, uns } } - lock_journal(journal); jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in); err = insert_revoke_hash(journal, blocknr, handle->h_transaction->t_tid); - unlock_journal(journal); BUFFER_TRACE(bh_in, "exit"); return err; } @@ -389,7 +439,9 @@ int journal_cancel_revoke(handle_t *hand if (record) { jbd_debug(4, "cancelled existing revoke on " "blocknr %llu\n", (u64)bh->b_blocknr); + spin_lock(&journal->j_revoke_lock); list_del(&record->hash); + spin_unlock(&journal->j_revoke_lock); kmem_cache_free(revoke_record_cache, record); did_revoke = 1; } @@ -418,6 +470,22 @@ int journal_cancel_revoke(handle_t *hand return did_revoke; } +/* journal_switch_revoke table select j_revoke for next transaction + * we do not want to suspend any processing until all revokes are + * written -bzzz + */ +void journal_switch_revoke_table(journal_t *journal) +{ + int i; + + if (journal->j_revoke == journal->j_revoke_table[0]) + journal->j_revoke = journal->j_revoke_table[1]; + else + journal->j_revoke = journal->j_revoke_table[0]; + + for (i = 0; i < journal->j_revoke->hash_size; i++) + INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]); +} /* * Write revoke records to the journal for all entries in the current @@ -438,7 +506,10 @@ void journal_write_revoke_records(journa descriptor = NULL; offset = 0; count = 0; - revoke = journal->j_revoke; + + /* select revoke table for committing transaction */ + revoke = journal->j_revoke == journal->j_revoke_table[0] ? + journal->j_revoke_table[1] : journal->j_revoke_table[0]; for (i = 0; i < revoke->hash_size; i++) { hash_list = &revoke->hash_table[i]; diff -puN fs/jbd/commit.c~jbd-240-dual-revoke-tables fs/jbd/commit.c --- 25/fs/jbd/commit.c~jbd-240-dual-revoke-tables Thu Jun 5 15:14:28 2003 +++ 25-akpm/fs/jbd/commit.c Thu Jun 5 15:14:28 2003 @@ -149,14 +149,10 @@ void journal_commit_transaction(journal_ jbd_debug (3, "JBD: commit phase 1\n"); - journal_write_revoke_records(journal, commit_transaction); - /* - * Now that we have built the revoke records, we can start - * reusing the revoke list for a new running transaction. We - * can now safely start committing the old transaction: time to - * get a new running transaction for incoming filesystem updates + * Switch to a new revoke table. */ + journal_switch_revoke_table(journal); spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_FLUSH; @@ -283,6 +279,10 @@ write_out_data_locked: sync_datalist_empty: spin_unlock(&journal->j_list_lock); + journal_write_revoke_records(journal, commit_transaction); + + jbd_debug(3, "JBD: commit phase 2\n"); + /* * If we found any dirty or locked buffers, then we should have * looped back up to the write_out_data label. If there weren't diff -puN include/linux/jbd.h~jbd-240-dual-revoke-tables include/linux/jbd.h --- 25/include/linux/jbd.h~jbd-240-dual-revoke-tables Thu Jun 5 15:14:28 2003 +++ 25-akpm/include/linux/jbd.h Thu Jun 5 15:14:28 2003 @@ -813,6 +813,7 @@ struct journal_s */ spinlock_t j_revoke_lock; struct jbd_revoke_table_s *j_revoke; + struct jbd_revoke_table_s *j_revoke_table[2]; /* * An opaque pointer to fs-private information. ext3 puts its @@ -995,10 +996,11 @@ extern int journal_cancel_revoke(hand extern void journal_write_revoke_records(journal_t *, transaction_t *); /* Recovery revoke support */ -extern int journal_set_revoke(journal_t *, unsigned long, tid_t); -extern int journal_test_revoke(journal_t *, unsigned long, tid_t); -extern void journal_clear_revoke(journal_t *); -extern void journal_brelse_array(struct buffer_head *b[], int n); +extern int journal_set_revoke(journal_t *, unsigned long, tid_t); +extern int journal_test_revoke(journal_t *, unsigned long, tid_t); +extern void journal_clear_revoke(journal_t *); +extern void journal_brelse_array(struct buffer_head *b[], int n); +extern void journal_switch_revoke_table(journal_t *journal); /* * The log thread user interface: _