From: Neil Brown Don't remove inode from hash until filesystem has deleted it. There is a small race with knfsd using iget to get an inode that is currently being deleted. This is because it is removed from the hash table *before* the filesystem gets to delete it. If nfsd does an iget in this window it will cause a read_inode which will return an apparently valid inode. However that inode will shortly be deleted from disc without knfsd noticing... until it is too late. With this patch, the inode being deleted is left on the hash table, and if a lookup find an inode being freed in the hashtable, it waits in the inode waitqueue for the inode to be fully deleted. fs/fs-writeback.c | 3 ++- fs/inode.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff -puN fs/fs-writeback.c~inode-unhashing-fix-2 fs/fs-writeback.c --- 25/fs/fs-writeback.c~inode-unhashing-fix-2 2003-05-11 21:39:52.000000000 -0700 +++ 25-akpm/fs/fs-writeback.c 2003-05-11 21:39:52.000000000 -0700 @@ -90,7 +90,8 @@ void __mark_inode_dirty(struct inode *in * Only add valid (hashed) inodes to the superblock's * dirty list. Add blockdev inodes as well. */ - if (hlist_unhashed(&inode->i_hash) && !S_ISBLK(inode->i_mode)) + if ((hlist_unhashed(&inode->i_hash) || (inode->i_state & (I_FREEING|I_CLEAR))) + && !S_ISBLK(inode->i_mode)) goto out; /* diff -puN fs/inode.c~inode-unhashing-fix-2 fs/inode.c --- 25/fs/inode.c~inode-unhashing-fix-2 2003-05-11 21:39:52.000000000 -0700 +++ 25-akpm/fs/inode.c 2003-05-11 21:39:52.000000000 -0700 @@ -466,6 +466,7 @@ static int shrink_icache_memory(int nr, return inodes_stat.nr_unused; } +void __wait_on_freeing_inode(struct inode *inode); /* * Called with the inode lock held. * NOTE: we are not increasing the inode-refcount, you must call __iget() @@ -477,6 +478,7 @@ static struct inode * find_inode(struct struct hlist_node *node; struct inode * inode = NULL; +repeat: hlist_for_each (node, head) { prefetch(node->next); inode = hlist_entry(node, struct inode, i_hash); @@ -484,6 +486,10 @@ static struct inode * find_inode(struct continue; if (!test(inode, data)) continue; + if (inode->i_state & (I_FREEING|I_CLEAR)) { + __wait_on_freeing_inode(inode); + goto repeat; + } break; } return node ? inode : NULL; @@ -498,6 +504,7 @@ static struct inode * find_inode_fast(st struct hlist_node *node; struct inode * inode = NULL; +repeat: hlist_for_each (node, head) { prefetch(node->next); inode = list_entry(node, struct inode, i_hash); @@ -505,6 +512,10 @@ static struct inode * find_inode_fast(st continue; if (inode->i_sb != sb) continue; + if (inode->i_state & (I_FREEING|I_CLEAR)) { + __wait_on_freeing_inode(inode); + goto repeat; + } break; } return node ? inode : NULL; @@ -937,7 +948,6 @@ void generic_delete_inode(struct inode * { struct super_operations *op = inode->i_sb->s_op; - hlist_del_init(&inode->i_hash); list_del_init(&inode->i_list); inode->i_state|=I_FREEING; inodes_stat.nr_inodes--; @@ -956,6 +966,10 @@ void generic_delete_inode(struct inode * delete(inode); } else clear_inode(inode); + spin_lock(&inode_lock); + hlist_del_init(&inode->i_hash); + spin_unlock(&inode_lock); + wake_up_inode(inode); if (inode->i_state != I_CLEAR) BUG(); destroy_inode(inode); @@ -1229,6 +1243,19 @@ repeat: __set_current_state(TASK_RUNNING); } +void __wait_on_freeing_inode(struct inode *inode) +{ + DECLARE_WAITQUEUE(wait, current); + wait_queue_head_t *wq = i_waitq_head(inode); + + add_wait_queue(wq, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&inode_lock); + schedule(); + remove_wait_queue(wq, &wait); + spin_lock(&inode_lock); +} + void wake_up_inode(struct inode *inode) { wait_queue_head_t *wq = i_waitq_head(inode); _