From: Chris Mason Add reiserfs support for flush barriers, mount with -o barrier=flush to enable them. Barriers are triggered on fsync and for log commits. DESC reiserfs-v3-barrier-support-tweak EDESC Signed-off-by: Andrew Morton --- 25-akpm/fs/reiserfs/file.c | 9 ++- 25-akpm/fs/reiserfs/journal.c | 81 ++++++++++++++++++++++++++++----- 25-akpm/fs/reiserfs/super.c | 31 ++++++++++++ 25-akpm/include/linux/reiserfs_fs.h | 3 - 25-akpm/include/linux/reiserfs_fs_sb.h | 4 + 5 files changed, 113 insertions(+), 15 deletions(-) diff -puN fs/reiserfs/file.c~reiserfs-v3-barrier-support fs/reiserfs/file.c --- 25/fs/reiserfs/file.c~reiserfs-v3-barrier-support 2004-07-13 13:16:20.329465368 -0700 +++ 25-akpm/fs/reiserfs/file.c 2004-07-13 13:16:20.342463392 -0700 @@ -89,15 +89,16 @@ static int reiserfs_sync_file( ) { struct inode * p_s_inode = p_s_dentry->d_inode; int n_err; - - reiserfs_write_lock(p_s_inode->i_sb); + int barrier_done; if (!S_ISREG(p_s_inode->i_mode)) BUG (); - n_err = sync_mapping_buffers(p_s_inode->i_mapping) ; - reiserfs_commit_for_inode(p_s_inode) ; + reiserfs_write_lock(p_s_inode->i_sb); + barrier_done = reiserfs_commit_for_inode(p_s_inode); reiserfs_write_unlock(p_s_inode->i_sb); + if (barrier_done != 1) + blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL); return ( n_err < 0 ) ? -EIO : 0; } diff -puN fs/reiserfs/journal.c~reiserfs-v3-barrier-support fs/reiserfs/journal.c --- 25/fs/reiserfs/journal.c~reiserfs-v3-barrier-support 2004-07-13 13:16:20.331465064 -0700 +++ 25-akpm/fs/reiserfs/journal.c 2004-07-13 13:16:20.346462784 -0700 @@ -127,6 +127,12 @@ static int reiserfs_clean_and_file_buffe return 0 ; } +static void disable_barrier(struct super_block *s) +{ + REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); + printk("reiserfs: disabling flush barriers on %s\n", reiserfs_bdevname(s)); +} + static struct reiserfs_bitmap_node * allocate_bitmap_node(struct super_block *p_s_sb) { struct reiserfs_bitmap_node *bn ; @@ -640,6 +646,15 @@ static void submit_ordered_buffer(struct submit_bh(WRITE, bh) ; } +static int submit_barrier_buffer(struct buffer_head *bh) { + get_bh(bh) ; + bh->b_end_io = reiserfs_end_ordered_io; + clear_buffer_dirty(bh) ; + if (!buffer_uptodate(bh)) + BUG(); + return submit_bh(WRITE_BARRIER, bh) ; +} + #define CHUNK_SIZE 32 struct buffer_chunk { struct buffer_head *bh[CHUNK_SIZE]; @@ -909,6 +924,7 @@ static int flush_commit_list(struct supe int bn ; struct buffer_head *tbh = NULL ; unsigned long trans_id = jl->j_trans_id; + int barrier = 0; reiserfs_check_lock_depth(s, "flush_commit_list") ; @@ -973,7 +989,20 @@ static int flush_commit_list(struct supe } atomic_dec(&SB_JOURNAL(s)->j_async_throttle); - /* wait on everything written so far before writing the commit */ + /* wait on everything written so far before writing the commit + * if we are in barrier mode, send the commit down now + */ + barrier = reiserfs_barrier_flush(s); + if (barrier) { + int ret; + lock_buffer(jl->j_commit_bh); + ret = submit_barrier_buffer(jl->j_commit_bh); + if (ret == -EOPNOTSUPP) { + set_buffer_uptodate(jl->j_commit_bh); + disable_barrier(s); + barrier = 0; + } + } for (i = 0 ; i < (jl->j_len + 1) ; i++) { bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s) ; @@ -995,10 +1024,14 @@ static int flush_commit_list(struct supe if (atomic_read(&(jl->j_commit_left)) != 1) BUG(); - if (buffer_dirty(jl->j_commit_bh)) - BUG(); - mark_buffer_dirty(jl->j_commit_bh) ; - sync_dirty_buffer(jl->j_commit_bh) ; + if (!barrier) { + if (buffer_dirty(jl->j_commit_bh)) + BUG(); + mark_buffer_dirty(jl->j_commit_bh) ; + sync_dirty_buffer(jl->j_commit_bh) ; + } else + wait_on_buffer(jl->j_commit_bh); + if (!buffer_uptodate(jl->j_commit_bh)) { reiserfs_panic(s, "journal-615: buffer write failed\n") ; } @@ -1098,8 +1131,22 @@ static int _update_journal_header_block( jh->j_last_flush_trans_id = cpu_to_le32(trans_id) ; jh->j_first_unflushed_offset = cpu_to_le32(offset) ; jh->j_mount_id = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_mount_id) ; - set_buffer_dirty(SB_JOURNAL(p_s_sb)->j_header_bh) ; - sync_dirty_buffer(SB_JOURNAL(p_s_sb)->j_header_bh) ; + + if (reiserfs_barrier_flush(p_s_sb)) { + int ret; + lock_buffer(SB_JOURNAL(p_s_sb)->j_header_bh); + ret = submit_barrier_buffer(SB_JOURNAL(p_s_sb)->j_header_bh); + if (ret == -EOPNOTSUPP) { + set_buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh); + disable_barrier(p_s_sb); + goto sync; + } + wait_on_buffer(SB_JOURNAL(p_s_sb)->j_header_bh); + } else { +sync: + set_buffer_dirty(SB_JOURNAL(p_s_sb)->j_header_bh) ; + sync_dirty_buffer(SB_JOURNAL(p_s_sb)->j_header_bh) ; + } if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) { reiserfs_warning (p_s_sb, "journal-837: IO error during journal replay"); return -EIO ; @@ -3184,11 +3231,16 @@ void reiserfs_update_inode_transaction(s REISERFS_I(inode)->i_trans_id = SB_JOURNAL(inode->i_sb)->j_trans_id ; } -static void __commit_trans_jl(struct inode *inode, unsigned long id, +/* + * returns -1 on error, 0 if no commits/barriers were done and 1 + * if a transaction was actually committed and the barrier was done + */ +static int __commit_trans_jl(struct inode *inode, unsigned long id, struct reiserfs_journal_list *jl) { struct reiserfs_transaction_handle th ; struct super_block *sb = inode->i_sb ; + int ret = 0; /* is it from the current transaction, or from an unknown transaction? */ if (id == SB_JOURNAL(sb)->j_trans_id) { @@ -3210,6 +3262,7 @@ static void __commit_trans_jl(struct ino } journal_end_sync(&th, sb, 1) ; + ret = 1; } else { /* this gets tricky, we have to make sure the journal list in @@ -3218,13 +3271,21 @@ static void __commit_trans_jl(struct ino */ flush_commit_only: if (journal_list_still_alive(inode->i_sb, id)) { + /* + * we only set ret to 1 when we know for sure + * the barrier hasn't been started yet on the commit + * block. + */ + if (atomic_read(&jl->j_commit_left) > 1) + ret = 1; flush_commit_list(sb, jl, 1) ; } } /* otherwise the list is gone, and long since committed */ + return ret; } -void reiserfs_commit_for_inode(struct inode *inode) { +int reiserfs_commit_for_inode(struct inode *inode) { unsigned long id = REISERFS_I(inode)->i_trans_id; struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; @@ -3237,7 +3298,7 @@ void reiserfs_commit_for_inode(struct in /* jl will be updated in __commit_trans_jl */ } - __commit_trans_jl(inode, id, jl); + return __commit_trans_jl(inode, id, jl); } void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, diff -puN fs/reiserfs/super.c~reiserfs-v3-barrier-support fs/reiserfs/super.c --- 25/fs/reiserfs/super.c~reiserfs-v3-barrier-support 2004-07-13 13:16:20.333464760 -0700 +++ 25-akpm/fs/reiserfs/super.c 2004-07-13 13:17:14.181278648 -0700 @@ -549,6 +549,13 @@ static const arg_desc_t logging_mode[] = {NULL, 0} }; +/* possible values for -o barrier= */ +static const arg_desc_t barrier_mode[] = { + {"none", 1<s_mount_opt &= ~all_barrier; + if (bits & flush) { + REISERFS_SB(s)->s_mount_opt |= flush; + printk("reiserfs: enabling write barrier flush mode\n"); + } else if (bits & none) { + REISERFS_SB(s)->s_mount_opt |= none; + printk("reiserfs: write barriers turned off\n"); + } + } +} + static void handle_attrs( struct super_block *s ) { struct reiserfs_super_block * rs; @@ -854,6 +879,8 @@ static int reiserfs_remount (struct supe safe_mask |= 1 << REISERFS_ATTRS; safe_mask |= 1 << REISERFS_XATTRS_USER; safe_mask |= 1 << REISERFS_POSIXACL; + safe_mask |= 1 << REISERFS_BARRIER_FLUSH; + safe_mask |= 1 << REISERFS_BARRIER_NONE; /* Update the bitmask, taking care to keep * the bits we're not allowed to change here */ @@ -900,6 +927,7 @@ static int reiserfs_remount (struct supe } handle_data_mode(s, mount_options); + handle_barrier_mode(s, mount_options); REISERFS_SB(s)->s_mount_state = sb_umount_state(rs) ; s->s_flags &= ~MS_RDONLY ; /* now it is safe to call journal_begin */ journal_begin(&th, s, 10) ; @@ -1413,6 +1441,9 @@ static int reiserfs_fill_super (struct s } else { reiserfs_info (s, "using writeback data mode\n"); } + if (reiserfs_barrier_flush(s)) { + printk("reiserfs: using flush barriers\n"); + } // set_device_ro(s->s_dev, 1) ; if( journal_init(s, jdev_name, old_format, commit_max_age) ) { diff -puN include/linux/reiserfs_fs.h~reiserfs-v3-barrier-support include/linux/reiserfs_fs.h --- 25/include/linux/reiserfs_fs.h~reiserfs-v3-barrier-support 2004-07-13 13:16:20.336464304 -0700 +++ 25-akpm/include/linux/reiserfs_fs.h 2004-07-13 13:16:20.351462024 -0700 @@ -1777,7 +1777,8 @@ int reiserfs_end_persistent_transaction( int reiserfs_commit_page(struct inode *inode, struct page *page, unsigned from, unsigned to); int reiserfs_flush_old_commits(struct super_block *); -void reiserfs_commit_for_inode(struct inode *) ; +int reiserfs_commit_for_inode(struct inode *) ; +int reiserfs_inode_needs_commit(struct inode *) ; void reiserfs_update_inode_transaction(struct inode *) ; void reiserfs_wait_on_write_block(struct super_block *s) ; void reiserfs_block_writes(struct reiserfs_transaction_handle *th) ; diff -puN include/linux/reiserfs_fs_sb.h~reiserfs-v3-barrier-support include/linux/reiserfs_fs_sb.h --- 25/include/linux/reiserfs_fs_sb.h~reiserfs-v3-barrier-support 2004-07-13 13:16:20.337464152 -0700 +++ 25-akpm/include/linux/reiserfs_fs_sb.h 2004-07-13 13:16:20.352461872 -0700 @@ -444,6 +444,8 @@ enum reiserfs_mount_options { REISERFS_XATTRS, REISERFS_XATTRS_USER, REISERFS_POSIXACL, + REISERFS_BARRIER_NONE, + REISERFS_BARRIER_FLUSH, REISERFS_TEST1, REISERFS_TEST2, @@ -473,6 +475,8 @@ enum reiserfs_mount_options { #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) +#define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE)) +#define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH)) void reiserfs_file_buffer (struct buffer_head * bh, int list); extern struct file_system_type reiserfs_fs_type; _