From: Andrey Borzenkov Oops is due to concurrent d_instantiate on the same dentry; the bug was unfortunately quite ugly to fix inside devfs itself. The attached patch makes sure d_revalidate is always called under parent i_sem allowing it to drop and reacquire semaphore before going to wait. It provides both mutual exclusion with devfs_lookup and between d_revalidate, fixing - this bug; unfortunately I do not know how to reproduce it on purpose. It apparently needs at least true SMP that I do not have. We need two d_revalidate's against the same dentry running concurrently - old devfs_lookup/d_revalidate deadlock (which has been fixed a bit - theoretically possible problem when dentry->d_op is changed after d_op->d_revalidate has been tested resulting in NULL pointer dereferencing (if (dentry->d_op->d_revalidate) dentry->d_op->d_revalidate). I am not even sure if it is really possible. Pavel, you have been lucky in cathing devfs bugs, could you please test this if it works for you? I appreciate comments about fs/namei.c changes. I tried to make them as non-intrusive as possible. Believe me - it is the most simple way to close devfs races. With this resolved we can start cleaning devfs; my final goal is to autoremove unneeded path components and get rid of devfs_name alltogether. Now when every driver has kernel name it is enough to register using this one letting devfsd to do the rest. fs/devfs/base.c | 62 ++++++++++++++++++++++++----------------------------- fs/namei.c | 21 +++++++++++++++-- include/linux/fs.h | 1 3 files changed, 48 insertions(+), 36 deletions(-) diff -puN fs/devfs/base.c~devfs-d_revalidate-oops-fix fs/devfs/base.c --- 25/fs/devfs/base.c~devfs-d_revalidate-oops-fix 2004-01-04 13:43:13.000000000 -0800 +++ 25-akpm/fs/devfs/base.c 2004-01-04 13:43:13.000000000 -0800 @@ -2171,17 +2171,9 @@ static void devfs_d_iput (struct dentry } /* End Function devfs_d_iput */ static int devfs_d_delete (struct dentry *dentry); - -static struct dentry_operations devfs_dops = -{ - .d_delete = devfs_d_delete, - .d_release = devfs_d_release, - .d_iput = devfs_d_iput, -}; - static int devfs_d_revalidate_wait (struct dentry *dentry, struct nameidata *); -static struct dentry_operations devfs_wait_dops = +static struct dentry_operations devfs_dops = { .d_delete = devfs_d_delete, .d_release = devfs_d_release, @@ -2198,8 +2190,8 @@ static int devfs_d_delete (struct dentry { struct inode *inode = dentry->d_inode; - if (dentry->d_op == &devfs_wait_dops) dentry->d_op = &devfs_dops; /* Unhash dentry if negative (has no inode) */ + /* FIXME should we check for d_fsdata? */ if (inode == NULL) { DPRINTK (DEBUG_D_DELETE, "(%p): dropping negative dentry\n", dentry); @@ -2216,6 +2208,11 @@ struct devfs_lookup_struct /* XXX: this doesn't handle the case where we got a negative dentry but a devfs entry has been registered in the meanwhile */ +/* + * This relies on the fact that d_revalidate is called under parent i_sem + * providing mutual exclusion with devfs_lookup and protection for + * dentry->d_fsdata + */ static int devfs_d_revalidate_wait (struct dentry *dentry, struct nameidata *nd) { struct inode *dir = dentry->d_parent->d_inode; @@ -2224,6 +2221,10 @@ static int devfs_d_revalidate_wait (stru struct devfs_lookup_struct *lookup_info = dentry->d_fsdata; DECLARE_WAITQUEUE (wait, current); + /* Ordinary case - nothing to revalidate */ + if (lookup_info == NULL) return 1; /* Early termination */ + + /* Called from devfsd action - cannot block. */ if ( is_devfsd_or_child (fs_info) ) { devfs_handle_t de = lookup_info->de; @@ -2252,25 +2253,22 @@ static int devfs_d_revalidate_wait (stru d_instantiate (dentry, inode); return 1; } - if (lookup_info == NULL) return 1; /* Early termination */ - read_lock (&parent->u.dir.lock); - if (dentry->d_fsdata) - { - set_current_state (TASK_UNINTERRUPTIBLE); - add_wait_queue (&lookup_info->wait_queue, &wait); - read_unlock (&parent->u.dir.lock); - schedule (); - /* - * This does not need nor should remove wait from wait_queue. - * Wait queue head is never reused - nothing is ever added to it - * after all waiters have been waked up and head itself disappears - * very soon after it. Moreover it is local variable on stack that - * is likely to have already disappeared so any reference to it - * at this point is buggy. - */ - } - else read_unlock (&parent->u.dir.lock); + /* Not devfsd - must wait for devfsd to return */ + set_current_state (TASK_UNINTERRUPTIBLE); + add_wait_queue (&lookup_info->wait_queue, &wait); + up(&dir->i_sem); + schedule (); + down(&dir->i_sem); + /* + * This does not need nor should remove wait from wait_queue. + * Wait queue head is never reused - nothing is ever added to it + * after all waiters have been waked up and head itself disappears + * very soon after it. Moreover it is local variable on stack that + * is likely to have already disappeared so any reference to it + * at this point is buggy. + */ + return 1; } /* End Function devfs_d_revalidate_wait */ @@ -2309,17 +2307,18 @@ static struct dentry *devfs_lookup (stru if (try_modload (parent, fs_info, dentry->d_name.name, dentry->d_name.len, &tmp) < 0) { /* Lookup event was not queued to devfsd */ + dentry->d_fsdata = NULL; d_add (dentry, NULL); return NULL; } } - dentry->d_op = &devfs_wait_dops; d_add (dentry, NULL); /* Open the floodgates */ /* Unlock directory semaphore, which will release any waiters. They will get the hashed dentry, and may be forced to wait for revalidation */ up (&dir->i_sem); wait_for_devfsd_finished (fs_info); /* If I'm not devfsd, must wait */ + down (&dir->i_sem); /* Grab it again because them's the rules */ de = lookup_info.de; /* If someone else has been so kind as to make the inode, we go home early */ @@ -2344,12 +2343,8 @@ static struct dentry *devfs_lookup (stru de->name, de->inode.ino, inode, de, current->comm); d_instantiate (dentry, inode); out: - write_lock (&parent->u.dir.lock); - dentry->d_op = &devfs_dops; dentry->d_fsdata = NULL; wake_up (&lookup_info.wait_queue); - write_unlock (&parent->u.dir.lock); - down (&dir->i_sem); /* Grab it again because them's the rules */ devfs_put (de); return retval; } /* End Function devfs_lookup */ @@ -2592,6 +2587,7 @@ static struct file_system_type devfs_fs_ .name = DEVFS_NAME, .get_sb = devfs_get_sb, .kill_sb = kill_anon_super, + .fs_flags = FS_ODD_REVALIDATE, }; /* File operations for devfsd follow */ diff -puN fs/namei.c~devfs-d_revalidate-oops-fix fs/namei.c --- 25/fs/namei.c~devfs-d_revalidate-oops-fix 2004-01-04 13:43:13.000000000 -0800 +++ 25-akpm/fs/namei.c 2004-01-04 13:43:13.000000000 -0800 @@ -274,6 +274,9 @@ void path_release(struct nameidata *nd) * Internal lookup() using the new generic dcache. * SMP-safe */ +/* + * This seems to always be called under parent->i_sem locked + */ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) { struct dentry * dentry = __d_lookup(parent, name); @@ -342,6 +345,7 @@ static struct dentry * real_lookup(struc { struct dentry * result; struct inode *dir = parent->d_inode; + int needlock = dir->i_sb->s_type->fs_flags & FS_ODD_REVALIDATE; down(&dir->i_sem); /* @@ -377,13 +381,16 @@ static struct dentry * real_lookup(struc * Uhhuh! Nasty case: the cache was re-populated while * we waited on the semaphore. Need to revalidate. */ - up(&dir->i_sem); + if (!needlock) + up(&dir->i_sem); if (result->d_op && result->d_op->d_revalidate) { if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { dput(result); result = ERR_PTR(-ENOENT); } } + if (needlock) + up(&dir->i_sem); return result; } @@ -528,11 +535,17 @@ static int do_lookup(struct nameidata *n { struct vfsmount *mnt = nd->mnt; struct dentry *dentry = __d_lookup(nd->dentry, name); + int needlock = mnt->mnt_sb->s_type->fs_flags & FS_ODD_REVALIDATE; if (!dentry) goto need_lookup; + if (needlock) + down(&nd->dentry->d_inode->i_sem); if (dentry->d_op && dentry->d_op->d_revalidate) goto need_revalidate; +unlock: + if (needlock) + up(&nd->dentry->d_inode->i_sem); done: path->mnt = mnt; path->dentry = dentry; @@ -546,9 +559,11 @@ need_lookup: need_revalidate: if (dentry->d_op->d_revalidate(dentry, nd)) - goto done; + goto unlock; if (d_invalidate(dentry)) - goto done; + goto unlock; + if (needlock) + up(&nd->dentry->d_inode->i_sem); dput(dentry); goto need_lookup; diff -puN include/linux/fs.h~devfs-d_revalidate-oops-fix include/linux/fs.h --- 25/include/linux/fs.h~devfs-d_revalidate-oops-fix 2004-01-04 13:43:13.000000000 -0800 +++ 25-akpm/include/linux/fs.h 2004-01-04 13:43:13.000000000 -0800 @@ -93,6 +93,7 @@ extern int leases_enable, dir_notify_ena #define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon * as nfs_rename() will be cleaned up */ +#define FS_ODD_REVALIDATE (1<<16) /* d_revalidate needs lock on i_sem */ /* * These are the fs-independent mount-flags: up to 32 flags are supported */ _