From: Maneesh Soni Here is a patch to use seqlock for real_lookup race with d_lookup as suggested by Linus. The race condition can result in duplicate dentry when d_lookup fails due concurrent d_move in some unrelated directory. Apart from real_lookup, lookup_hash()->cached_lookup() can also fail due to same reason. So, for that I am doing the d_lookup again. Now we have __d_lookup (called from do_lookup() during pathwalk) and d_lookup which uses seqlock to protect againt rename race. dcachebench numbers (lower is better) don't have much difference on a 4-way PIII xeon SMP box. base-2565 Average usec/iteration 19059.4 Standard Deviation 503.07 base-2565 + seq_lock Average usec/iteration 18843.2 Standard Deviation 450.57 fs/dcache.c | 23 ++++++++++++++++++++++- fs/namei.c | 17 ++++++++++------- include/linux/dcache.h | 1 + 3 files changed, 33 insertions(+), 8 deletions(-) diff -puN fs/dcache.c~real_lookup-race-fix fs/dcache.c --- 25/fs/dcache.c~real_lookup-race-fix 2003-03-28 02:52:33.000000000 -0800 +++ 25-akpm/fs/dcache.c 2003-03-28 02:52:33.000000000 -0800 @@ -27,12 +27,14 @@ #include #include #include +#include #define DCACHE_PARANOIA 1 /* #define DCACHE_DEBUG 1 */ spinlock_t dcache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; rwlock_t dparent_lock __cacheline_aligned_in_smp = RW_LOCK_UNLOCKED; +seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; static kmem_cache_t *dentry_cache; @@ -926,7 +928,7 @@ struct dentry *d_splice_alias(struct ino * is returned. The caller must use d_put to free the entry when it has * finished using it. %NULL is returned on failure. * - * d_lookup is now, dcache_lock free. The hash list is protected using RCU. + * __d_lookup is dcache_lock free. The hash list is protected using RCU. * Memory barriers are used while updating and doing lockless traversal. * To avoid races with d_move while rename is happening, d_move_count is * used. @@ -939,10 +941,27 @@ struct dentry *d_splice_alias(struct ino * * d_lru list is not updated, which can leave non-zero d_count dentries * around in d_lru list. + * + * d_lookup() is protected against the concurrent renames in some unrelated + * directory using the seqlockt_t rename_lock. */ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) { + struct dentry * dentry = NULL; + unsigned long seq; + + do { + seq = read_seqbegin(&rename_lock); + dentry = __d_lookup(parent, name); + if (dentry) + break; + } while (read_seqretry(&rename_lock, seq)); + return dentry; +} + +struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) +{ unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; @@ -1185,6 +1204,7 @@ void d_move(struct dentry * dentry, stru printk(KERN_WARNING "VFS: moving negative dcache entry\n"); spin_lock(&dcache_lock); + write_seqlock(&rename_lock); spin_lock(&dentry->d_lock); /* Move the dentry to the target hash queue, if on different bucket */ @@ -1222,6 +1242,7 @@ void d_move(struct dentry * dentry, stru list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); dentry->d_move_count++; spin_unlock(&dentry->d_lock); + write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); } diff -puN fs/namei.c~real_lookup-race-fix fs/namei.c --- 25/fs/namei.c~real_lookup-race-fix 2003-03-28 02:52:33.000000000 -0800 +++ 25-akpm/fs/namei.c 2003-03-28 02:52:33.000000000 -0800 @@ -275,8 +275,14 @@ void path_release(struct nameidata *nd) */ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) { - struct dentry * dentry = d_lookup(parent, name); - + struct dentry * dentry = __d_lookup(parent, name); + + /* lockess __d_lookup may fail due to concurrent d_move() + * in some unrelated directory, so try with d_lookup + */ + if (!dentry) + dentry = d_lookup(parent, name); + if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { dput(dentry); @@ -348,12 +354,9 @@ static struct dentry * real_lookup(struc * negatives from the RCU list walk here, unlike the optimistic * fast walk). * - * We really should do a sequence number thing to avoid this - * all. + * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup */ - spin_lock(&dcache_lock); result = d_lookup(parent, name); - spin_unlock(&dcache_lock); if (!result) { struct dentry * dentry = d_alloc(parent, name); result = ERR_PTR(-ENOMEM); @@ -524,7 +527,7 @@ static int do_lookup(struct nameidata *n struct path *path, int flags) { struct vfsmount *mnt = nd->mnt; - struct dentry *dentry = d_lookup(nd->dentry, name); + struct dentry *dentry = __d_lookup(nd->dentry, name); if (!dentry) goto need_lookup; diff -puN include/linux/dcache.h~real_lookup-race-fix include/linux/dcache.h --- 25/include/linux/dcache.h~real_lookup-race-fix 2003-03-28 02:52:33.000000000 -0800 +++ 25-akpm/include/linux/dcache.h 2003-03-28 02:52:33.000000000 -0800 @@ -238,6 +238,7 @@ extern void d_move(struct dentry *, stru /* appendix may either be NULL or be used for transname suffixes */ extern struct dentry * d_lookup(struct dentry *, struct qstr *); +extern struct dentry * __d_lookup(struct dentry *, struct qstr *); /* validate "insecure" dentry pointer */ extern int d_validate(struct dentry *, struct dentry *); _