This is a forward-port from 2.4. The (old) 2.5 code is forcing a commit on every write by artificially dirtying the inode. But generic_file_aio_write() has called generic_osync_inode() for us, which has synced the file data. ext3/file.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 files changed, 46 insertions(+), 14 deletions(-) diff -puN fs/ext3/file.c~ext3-O_SYNC-speedup fs/ext3/file.c --- 25/fs/ext3/file.c~ext3-O_SYNC-speedup 2003-02-23 16:00:58.000000000 -0800 +++ 25-akpm/fs/ext3/file.c 2003-02-23 16:13:10.000000000 -0800 @@ -55,29 +55,61 @@ static int ext3_open_file (struct inode return 0; } -/* - * ext3_file_write(). - * - * Most things are done in ext3_prepare_write() and ext3_commit_write(). - */ - static ssize_t ext3_file_write(struct kiocb *iocb, const char *buf, size_t count, loff_t pos) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_dentry->d_inode; + int ret, err; + + ret = generic_file_aio_write(iocb, buf, count, pos); /* - * Nasty: if the file is subject to synchronous writes then we need - * to force generic_osync_inode() to call ext3_write_inode(). - * We do that by marking the inode dirty. This adds much more - * computational expense than we need, but we're going to sync - * anyway. + * Skip flushing if there was an error, or if nothing was written. */ - if (IS_SYNC(inode) || (file->f_flags & O_SYNC)) - mark_inode_dirty(inode); + if (ret <= 0) + return ret; + + /* + * If the inode is IS_SYNC, or is O_SYNC and we are doing data + * journalling then we need to make sure that we force the transaction + * to disk to keep all metadata uptodate synchronously. + */ + if (file->f_flags & O_SYNC) { + /* + * If we are non-data-journaled, then the dirty data has + * already been flushed to backing store by generic_osync_inode, + * and the inode has been flushed too if there have been any + * modifications other than mere timestamp updates. + * + * Open question --- do we care about flushing timestamps too + * if the inode is IS_SYNC? + */ + if (!ext3_should_journal_data(inode)) + return ret; + + goto force_commit; + } - return generic_file_aio_write(iocb, buf, count, pos); + /* + * So we know that there has been no forced data flush. If the inode + * is marked IS_SYNC, we need to force one ourselves. + */ + if (!IS_SYNC(inode)) + return ret; + + /* + * Open question #2 --- should we force data to disk here too? If we + * don't, the only impact is that data=writeback filesystems won't + * flush data to disk automatically on IS_SYNC, only metadata (but + * historically, that is what ext2 has done.) + */ + +force_commit: + err = ext3_force_commit(inode->i_sb); + if (err) + return err; + return ret; } struct file_operations ext3_file_operations = { _