25-akpm/fs/ext3/balloc.c | 118 +++++++++++++++++++++++++++------------ 25-akpm/fs/jbd/journal.c | 2 25-akpm/fs/jbd/transaction.c | 2 25-akpm/include/linux/ext3_jbd.h | 6 + include/linux/jbd.h | 0 5 files changed, 88 insertions(+), 40 deletions(-) diff -puN fs/ext3/balloc.c~ext3-concurrent-block-allocation-fix-1 fs/ext3/balloc.c --- 25/fs/ext3/balloc.c~ext3-concurrent-block-allocation-fix-1 Tue Apr 8 14:06:34 2003 +++ 25-akpm/fs/ext3/balloc.c Tue Apr 8 14:06:34 2003 @@ -201,16 +201,6 @@ do_more: } } #endif - BUFFER_TRACE(bitmap_bh, "clear bit"); - if (!ext3_clear_bit_atomic (&EXT3_SB(sb)->s_bgi[block_group].bg_balloc_lock, - bit + i, bitmap_bh->b_data)) { - ext3_error (sb, __FUNCTION__, - "bit already cleared for block %lu", - block + i); - BUFFER_TRACE(bitmap_bh, "bit already cleared"); - } else - dquot_freed_blocks++; - /* @@@ This prevents newly-allocated data from being * freed and then reallocated within the same * transaction. @@ -229,17 +219,34 @@ do_more: * activity on the buffer any more and so it is safe to * reallocate it. */ - BUFFER_TRACE(bitmap_bh, "clear in b_committed_data"); + BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); J_ASSERT_BH(bitmap_bh, bh2jh(bitmap_bh)->b_committed_data != NULL); - ext3_set_bit_atomic(&EXT3_SB(sb)->s_bgi[group].bg_balloc_lock, - bit + i, bh2jh(bitmap_bh)->b_committed_data); + ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i, + bh2jh(bitmap_bh)->b_committed_data); + + /* + * We clear the bit in the bitmap after setting the committed + * data bit, because this is the reverse order to that which + * the allocator uses. + */ + BUFFER_TRACE(bitmap_bh, "clear bit"); + if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group), + bit + i, bitmap_bh->b_data)) { + ext3_error (sb, __FUNCTION__, + "bit already cleared for block %lu", + block + i); + BUFFER_TRACE(bitmap_bh, "bit already cleared"); + } else { + dquot_freed_blocks++; + } } - spin_lock(&EXT3_SB(sb)->s_bgi[block_group].bg_balloc_lock); + spin_lock(bg_lock(sb, block_group)); gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + dquot_freed_blocks); - spin_unlock(&EXT3_SB(sb)->s_bgi[block_group].bg_balloc_lock); + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + + dquot_freed_blocks); + spin_unlock(bg_lock(sb, block_group)); /* We dirtied the bitmap block */ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); @@ -358,11 +365,37 @@ static int find_next_usable_block(int st return -1; } +/* + * We think we can allocate this block in this bitmap. Try to set the bit. + * If that succeeds then check that nobody has allocated and then freed the + * block since we saw that is was not marked in b_committed_data. If it _was_ + * allocated and freed then clear the bit in the bitmap again and return + * zero (failure). + */ +static inline int +claim_block(spinlock_t *lock, int block, struct buffer_head *bh) +{ + if (ext3_set_bit_atomic(lock, block, bh->b_data)) + return 0; + if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data && + ext3_test_bit(block, bh2jh(bh)->b_committed_data)) { + ext3_clear_bit_atomic(lock, block, bh->b_data); + return 0; + } + return 1; +} + +/* + * If we failed to allocate the desired block then we may end up crossing to a + * new bitmap. In that case we must release write access to the old one via + * ext3_journal_release_buffer(), else we'll run out of credits. + */ static int ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, struct buffer_head *bitmap_bh, int goal, int *errp) { int i, fatal = 0; + int have_access = 0; *errp = 0; @@ -372,31 +405,38 @@ ext3_try_to_allocate(struct super_block repeat: goal = find_next_usable_block(goal, bitmap_bh, EXT3_BLOCKS_PER_GROUP(sb)); - if (goal < 0) - return -1; + if (goal < 0) + goto fail; for (i = 0; i < 7 && goal > 0 && ext3_test_allocatable(goal - 1, bitmap_bh); i++, goal--); got: - /* Make sure we use undo access for the bitmap, because it is - * critical that we do the frozen_data COW on bitmap buffers in - * all cases even if the buffer is in BJ_Forget state in the - * committing transaction. */ - BUFFER_TRACE(bitmap_bh, "get undo access for marking new block"); - fatal = ext3_journal_get_undo_access(handle, bitmap_bh); - if (fatal) { - *errp = fatal; - return -1; + if (!have_access) { + /* + * Make sure we use undo access for the bitmap, because it is + * critical that we do the frozen_data COW on bitmap buffers in + * all cases even if the buffer is in BJ_Forget state in the + * committing transaction. + */ + BUFFER_TRACE(bitmap_bh, "get undo access for new block"); + fatal = ext3_journal_get_undo_access(handle, bitmap_bh); + if (fatal) { + *errp = fatal; + goto fail; + } + have_access = 1; } - if (ext3_set_bit_atomic(&EXT3_SB(sb)->s_bgi[group].bg_balloc_lock, - goal, bitmap_bh->b_data)) { - /* already allocated by concurrent thread -bzzz */ + if (!claim_block(bg_lock(sb, group), goal, bitmap_bh)) { + /* + * The block was allocated by another thread, or it was + * allocated and then freed by another thread + */ goal++; if (goal >= EXT3_BLOCKS_PER_GROUP(sb)) - return -1; + goto fail; goto repeat; } @@ -404,10 +444,16 @@ got: fatal = ext3_journal_dirty_metadata(handle, bitmap_bh); if (fatal) { *errp = fatal; - return -1; + goto fail; } return goal; +fail: + if (have_access) { + BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); + ext3_journal_release_buffer(handle, bitmap_bh); + } + return -1; } @@ -566,13 +612,13 @@ allocated: } } #endif - spin_lock(&EXT3_SB(sb)->s_bgi[group_no].bg_balloc_lock); + spin_lock(bg_lock(sb, group_no)); if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) J_ASSERT_BH(bitmap_bh, !ext3_test_bit(ret_block, bh2jh(bitmap_bh)->b_committed_data)); ext3_debug("found bit %d\n", ret_block); - spin_unlock(&EXT3_SB(sb)->s_bgi[group_no].bg_balloc_lock); + spin_unlock(bg_lock(sb, group_no)); /* ret_block was blockgroup-relative. Now it becomes fs-relative */ ret_block = target_block; @@ -593,10 +639,10 @@ allocated: ext3_debug("allocating block %d. Goal hits %d of %d.\n", ret_block, goal_hits, goal_attempts); - spin_lock(&EXT3_SB(sb)->s_bgi[group_no].bg_balloc_lock); + spin_lock(bg_lock(sb, group_no)); gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1); - spin_unlock(&EXT3_SB(sb)->s_bgi[group_no].bg_balloc_lock); + spin_unlock(bg_lock(sb, group_no)); BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); err = ext3_journal_dirty_metadata(handle, gdp_bh); diff -puN fs/jbd/transaction.c~ext3-concurrent-block-allocation-fix-1 fs/jbd/transaction.c --- 25/fs/jbd/transaction.c~ext3-concurrent-block-allocation-fix-1 Tue Apr 8 14:06:34 2003 +++ 25-akpm/fs/jbd/transaction.c Tue Apr 8 14:06:34 2003 @@ -1121,7 +1121,6 @@ out: return 0; } -#if 0 /* * journal_release_buffer: undo a get_write_access without any buffer * updates, if the update decided in the end that it didn't need access. @@ -1155,7 +1154,6 @@ void journal_release_buffer (handle_t *h JBUFFER_TRACE(jh, "exit"); unlock_journal(journal); } -#endif /** * void journal_forget() - bforget() for potentially-journaled buffers. diff -puN include/linux/jbd.h~ext3-concurrent-block-allocation-fix-1 include/linux/jbd.h diff -puN include/linux/ext3_jbd.h~ext3-concurrent-block-allocation-fix-1 include/linux/ext3_jbd.h --- 25/include/linux/ext3_jbd.h~ext3-concurrent-block-allocation-fix-1 Tue Apr 8 14:06:34 2003 +++ 25-akpm/include/linux/ext3_jbd.h Tue Apr 8 14:06:34 2003 @@ -117,6 +117,12 @@ __ext3_journal_get_write_access(const ch } static inline void +ext3_journal_release_buffer(handle_t *handle, struct buffer_head *bh) +{ + journal_release_buffer(handle, bh); +} + +static inline void ext3_journal_forget(handle_t *handle, struct buffer_head *bh) { journal_forget(handle, bh); diff -puN fs/jbd/journal.c~ext3-concurrent-block-allocation-fix-1 fs/jbd/journal.c --- 25/fs/jbd/journal.c~ext3-concurrent-block-allocation-fix-1 Tue Apr 8 14:06:47 2003 +++ 25-akpm/fs/jbd/journal.c Tue Apr 8 14:06:51 2003 @@ -48,9 +48,7 @@ EXPORT_SYMBOL(journal_get_create_access) EXPORT_SYMBOL(journal_get_undo_access); EXPORT_SYMBOL(journal_dirty_data); EXPORT_SYMBOL(journal_dirty_metadata); -#if 0 EXPORT_SYMBOL(journal_release_buffer); -#endif EXPORT_SYMBOL(journal_forget); #if 0 EXPORT_SYMBOL(journal_sync_buffer); _