diff -urNp 2.4.19rc1/fs/buffer.c z/fs/buffer.c --- 2.4.19rc1/fs/buffer.c Fri Jul 5 12:20:47 2002 +++ z/fs/buffer.c Wed Jul 10 20:52:41 2002 @@ -587,20 +587,20 @@ struct buffer_head * get_hash_table(kdev void buffer_insert_inode_queue(struct buffer_head *bh, struct inode *inode) { spin_lock(&lru_list_lock); - if (bh->b_inode) - list_del(&bh->b_inode_buffers); - bh->b_inode = inode; - list_add(&bh->b_inode_buffers, &inode->i_dirty_buffers); + if (!bh->b_inode) { + bh->b_inode = inode; + list_add(&bh->b_inode_buffers, &inode->i_dirty_buffers); + } spin_unlock(&lru_list_lock); } void buffer_insert_inode_data_queue(struct buffer_head *bh, struct inode *inode) { spin_lock(&lru_list_lock); - if (bh->b_inode) - list_del(&bh->b_inode_buffers); - bh->b_inode = inode; - list_add(&bh->b_inode_buffers, &inode->i_dirty_data_buffers); + if (!bh->b_inode) { + bh->b_inode = inode; + list_add(&bh->b_inode_buffers, &inode->i_dirty_data_buffers); + } spin_unlock(&lru_list_lock); } @@ -819,37 +819,40 @@ inline void set_buffer_async_io(struct b * forever if somebody is actively writing to the file. * * Do this in two main stages: first we copy dirty buffers to a - * temporary inode list, queueing the writes as we go. Then we clean + * temporary list, queueing the writes as we go. Then we clean * up, waiting for those writes to complete. * * During this second stage, any subsequent updates to the file may end - * up refiling the buffer on the original inode's dirty list again, so - * there is a chance we will end up with a buffer queued for write but - * not yet completed on that list. So, as a final cleanup we go through - * the osync code to catch these locked, dirty buffers without requeuing - * any newly dirty buffers for write. + * up marking some of our private bh dirty, so we must refile them + * into the original inode's dirty list again during the second stage. */ int fsync_buffers_list(struct list_head *list) { struct buffer_head *bh; - struct inode tmp; - int err = 0, err2; - - INIT_LIST_HEAD(&tmp.i_dirty_buffers); + int err = 0; + LIST_HEAD(tmp); spin_lock(&lru_list_lock); while (!list_empty(list)) { - bh = BH_ENTRY(list->next); + bh = BH_ENTRY(list->prev); list_del(&bh->b_inode_buffers); if (!buffer_dirty(bh) && !buffer_locked(bh)) bh->b_inode = NULL; else { - bh->b_inode = &tmp; - list_add(&bh->b_inode_buffers, &tmp.i_dirty_buffers); + list_add(&bh->b_inode_buffers, &tmp); if (buffer_dirty(bh)) { get_bh(bh); spin_unlock(&lru_list_lock); + /* + * Wait I/O completion before submitting + * the buffer, to be sure the write will + * be effective on the latest data in + * the buffer. (otherwise - if there's old + * I/O in flight - write_buffer would become + * a noop) + */ + wait_on_buffer(bh); ll_rw_block(WRITE, 1, &bh); brelse(bh); spin_lock(&lru_list_lock); @@ -857,9 +860,20 @@ int fsync_buffers_list(struct list_head } } - while (!list_empty(&tmp.i_dirty_buffers)) { - bh = BH_ENTRY(tmp.i_dirty_buffers.prev); - remove_inode_queue(bh); + while (!list_empty(&tmp)) { + bh = BH_ENTRY(tmp.prev); + list_del(&bh->b_inode_buffers); + /* + * If the buffer is been made dirty again + * during the fsync (for example from a ->writepage + * that doesn't take the i_sem), just make sure not + * to lose track of it, put it back the buffer into + * its inode queue. + */ + if (!buffer_dirty(bh)) + bh->b_inode = NULL; + else + list_add(&bh->b_inode_buffers, &bh->b_inode->i_dirty_buffers); get_bh(bh); spin_unlock(&lru_list_lock); wait_on_buffer(bh); @@ -870,12 +884,8 @@ int fsync_buffers_list(struct list_head } spin_unlock(&lru_list_lock); - err2 = osync_buffers_list(list); - if (err) - return err; - else - return err2; + return err; } /* @@ -887,6 +897,10 @@ int fsync_buffers_list(struct list_head * you dirty the buffers, and then use osync_buffers_list to wait for * completion. Any other dirty buffers which are not yet queued for * write will not be flushed to disk by the osync. + * + * Nobody uses this functionality right now because everybody marks the bh + * dirty and then use fsync_buffers_list() to first flush them and then + * wait completion on them. (see inode.c generic_osync_inode for more details) */ static int osync_buffers_list(struct list_head *list) {