From: Hua Zhong Hi Andrew/Stephen: The current ext3 totally ignores I/O errors that happened during a journal_force_commit time, causing user space to falsely believe it has succeeded, which actually did not. This patch checks IO error during journal_commit_transaction. and aborts the journal when there is I/O error. Originally I thought about reporting the error without doing aborting the journal, but it probably needs a new flag. Aborting the journal seems to be the easy way to signal "hey sth is wrong..". fs/ext3/fsync.c | 3 +-- fs/jbd/commit.c | 21 +++++++++++++++++++-- fs/jbd/journal.c | 12 +++++++++++- fs/jbd/transaction.c | 6 +++--- include/linux/jbd.h | 2 +- 5 files changed, 35 insertions(+), 9 deletions(-) diff -puN fs/ext3/fsync.c~jbd-handle-journal-io-errors fs/ext3/fsync.c --- 25/fs/ext3/fsync.c~jbd-handle-journal-io-errors 2003-03-27 01:15:47.000000000 -0800 +++ 25-akpm/fs/ext3/fsync.c 2003-03-27 01:15:47.000000000 -0800 @@ -72,6 +72,5 @@ int ext3_sync_file(struct file * file, s * (they were dirtied by commit). But that's OK - the blocks are * safe in-journal, which is all fsync() needs to ensure. */ - ext3_force_commit(inode->i_sb); - return 0; + return ext3_force_commit(inode->i_sb); } diff -puN fs/jbd/commit.c~jbd-handle-journal-io-errors fs/jbd/commit.c --- 25/fs/jbd/commit.c~jbd-handle-journal-io-errors 2003-03-27 01:15:47.000000000 -0800 +++ 25-akpm/fs/jbd/commit.c 2003-03-27 01:15:47.000000000 -0800 @@ -118,7 +118,7 @@ void journal_commit_transaction(journal_ #endif lock_kernel(); - + J_ASSERT (journal->j_running_transaction != NULL); J_ASSERT (journal->j_committing_transaction == NULL); @@ -218,6 +218,7 @@ void journal_commit_transaction(journal_ * on the transaction lists. Data blocks go first. */ + err = 0; /* * Whenever we unlock the journal and sleep, things can get added * onto ->t_datalist, so we have to keep looping back to write_out_data @@ -299,6 +300,8 @@ write_out_data_locked: spin_unlock(&journal_datalist_lock); unlock_journal(journal); wait_on_buffer(bh); + if (unlikely(!buffer_uptodate(bh))) + err = -EIO; /* the journal_head may have been removed now */ lock_journal(journal); goto write_out_data; @@ -500,6 +503,10 @@ start_journal_io: if (buffer_locked(bh)) { unlock_journal(journal); wait_on_buffer(bh); + if (unlikely(!buffer_uptodate(bh))) + err = -EIO; + if (unlikely(!buffer_uptodate(bh))) + err = -EIO; lock_journal(journal); goto wait_for_iobuf; } @@ -561,6 +568,8 @@ start_journal_io: if (buffer_locked(bh)) { unlock_journal(journal); wait_on_buffer(bh); + if (unlikely(!buffer_uptodate(bh))) + err = -EIO; lock_journal(journal); goto wait_for_ctlbuf; } @@ -608,7 +617,9 @@ start_journal_io: struct buffer_head *bh = jh2bh(descriptor); set_buffer_uptodate(bh); sync_dirty_buffer(bh); - __brelse(bh); /* One for getblk() */ + if (unlikely(!buffer_uptodate(bh))) + err = -EIO; + put_bh(bh); /* One for getblk() */ journal_unlock_journal_head(descriptor); } @@ -619,6 +630,12 @@ start_journal_io: skip_commit: /* The journal should be unlocked by now. */ + if (err) { + lock_journal(journal); + __journal_abort_hard(journal); + unlock_journal(journal); + } + /* Call any callbacks that had been registered for handles in this * transaction. It is up to the callback to free any allocated * memory. diff -puN fs/jbd/journal.c~jbd-handle-journal-io-errors fs/jbd/journal.c --- 25/fs/jbd/journal.c~jbd-handle-journal-io-errors 2003-03-27 01:15:47.000000000 -0800 +++ 25-akpm/fs/jbd/journal.c 2003-03-27 01:15:47.000000000 -0800 @@ -580,8 +580,9 @@ out: * Wait for a specified commit to complete. * The caller may not hold the journal lock. */ -void log_wait_commit (journal_t *journal, tid_t tid) +int log_wait_commit (journal_t *journal, tid_t tid) { + int err; lock_kernel(); #ifdef CONFIG_JBD_DEBUG lock_journal(journal); @@ -598,7 +599,16 @@ void log_wait_commit (journal_t *journal wake_up(&journal->j_wait_commit); sleep_on(&journal->j_wait_done_commit); } + + if (unlikely(is_journal_aborted(journal))) { + printk(KERN_EMERG "journal commit I/O error\n"); + err = -EIO; + } else { + err = 0; + } + unlock_kernel(); + return err; } /* diff -puN fs/jbd/transaction.c~jbd-handle-journal-io-errors fs/jbd/transaction.c --- 25/fs/jbd/transaction.c~jbd-handle-journal-io-errors 2003-03-27 01:15:47.000000000 -0800 +++ 25-akpm/fs/jbd/transaction.c 2003-03-27 01:15:47.000000000 -0800 @@ -1401,7 +1401,7 @@ int journal_stop(handle_t *handle) * to wait for the commit to complete. */ if (handle->h_sync && !(current->flags & PF_MEMALLOC)) - log_wait_commit(journal, tid); + err = log_wait_commit(journal, tid); } jbd_free_handle(handle); return err; @@ -1417,7 +1417,7 @@ int journal_stop(handle_t *handle) int journal_force_commit(journal_t *journal) { handle_t *handle; - int ret = 0; + int ret; lock_kernel(); handle = journal_start(journal, 1); @@ -1426,7 +1426,7 @@ int journal_force_commit(journal_t *jour goto out; } handle->h_sync = 1; - journal_stop(handle); + ret = journal_stop(handle); out: unlock_kernel(); return ret; diff -puN include/linux/jbd.h~jbd-handle-journal-io-errors include/linux/jbd.h --- 25/include/linux/jbd.h~jbd-handle-journal-io-errors 2003-03-27 01:15:47.000000000 -0800 +++ 25-akpm/include/linux/jbd.h 2003-03-27 01:15:47.000000000 -0800 @@ -855,7 +855,7 @@ extern void journal_brelse_array(stru extern int log_space_left (journal_t *); /* Called with journal locked */ extern tid_t log_start_commit (journal_t *, transaction_t *); -extern void log_wait_commit (journal_t *, tid_t); +extern int log_wait_commit (journal_t *, tid_t); extern int log_do_checkpoint (journal_t *, int); extern void log_wait_for_space(journal_t *, int nblocks); _