Patch from Manfred Spraul Fixes the lock contention over file_list_lock by simply removing the unneeded anon_list. So filp allocation does not need to take a global lock any more. The use of a spinlock to protect files_stat.nr_files is a bit awkward, but the alternative is a custom sysctl handler, and isn't much more efficient anyway. fs/dcache.c | 3 - fs/file_table.c | 132 ++++++++++++++++++++++++++++++--------------------- include/linux/file.h | 3 + 3 files changed, 85 insertions(+), 53 deletions(-) diff -puN fs/dcache.c~file_list_lock-contention-fix fs/dcache.c --- 25/fs/dcache.c~file_list_lock-contention-fix 2003-03-13 02:40:32.000000000 -0800 +++ 25-akpm/fs/dcache.c 2003-03-13 02:40:32.000000000 -0800 @@ -24,6 +24,7 @@ #include #include #include +#include #include #define DCACHE_PARANOIA 1 @@ -1571,7 +1572,7 @@ void __init vfs_caches_init(unsigned lon filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, filp_ctor, filp_dtor); if(!filp_cachep) panic("Cannot create filp SLAB cache"); diff -puN fs/file_table.c~file_list_lock-contention-fix fs/file_table.c --- 25/fs/file_table.c~file_list_lock-contention-fix 2003-03-13 02:40:32.000000000 -0800 +++ 25-akpm/fs/file_table.c 2003-03-13 03:55:25.000000000 -0800 @@ -22,12 +22,45 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -/* Here the new files go */ -static LIST_HEAD(anon_list); -/* And here the free ones sit */ +/* list of free filps for root */ static LIST_HEAD(free_list); /* public *and* exported. Not pretty! */ -spinlock_t files_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; +spinlock_t __cacheline_aligned_in_smp files_lock = SPIN_LOCK_UNLOCKED; + +static spinlock_t filp_count_lock = SPIN_LOCK_UNLOCKED; + +/* slab constructors and destructors are called from arbitrary + * context and must be fully threaded - use a local spinlock + * to protect files_stat.nr_files + */ +void filp_ctor(void * objp, struct kmem_cache_s *cachep, unsigned long cflags) +{ + if ((cflags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + unsigned long flags; + spin_lock_irqsave(&filp_count_lock, flags); + files_stat.nr_files++; + spin_unlock_irqrestore(&filp_count_lock, flags); + } +} + +void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags) +{ + unsigned long flags; + spin_lock_irqsave(&filp_count_lock, flags); + files_stat.nr_files--; + spin_unlock_irqrestore(&filp_count_lock, flags); +} + +static inline void file_free(struct file* f) +{ + if (files_stat.nr_free_files <= NR_RESERVED_FILES) { + list_add(&f->f_list, &free_list); + files_stat.nr_free_files++; + } else { + kmem_cache_free(filp_cachep, f); + } +} /* Find an unused file structure and return a pointer to it. * Returns NULL, if there are no more free file structures or @@ -37,57 +70,48 @@ spinlock_t files_lock __cacheline_aligne */ struct file * get_empty_filp(void) { - static int old_max = 0; +static int old_max = 0; struct file * f; + if (likely(files_stat.nr_files < files_stat.max_files)) { + f = kmem_cache_alloc(filp_cachep, SLAB_KERNEL); + if (f) { +got_one: + memset(f, 0, sizeof(*f)); + if (security_file_alloc(f)) { + file_list_lock(); + file_free(f); + file_list_unlock(); + + return NULL; + } + eventpoll_init_file(f); + atomic_set(&f->f_count,1); + f->f_uid = current->fsuid; + f->f_gid = current->fsgid; + f->f_owner.lock = RW_LOCK_UNLOCKED; + /* f->f_version, f->f_list.next: 0 */ + return f; + } + } + /* Use a reserved one if we're the superuser */ file_list_lock(); - if (files_stat.nr_free_files > NR_RESERVED_FILES) { - used_one: + if (files_stat.nr_free_files && !current->euid) { f = list_entry(free_list.next, struct file, f_list); list_del(&f->f_list); files_stat.nr_free_files--; - new_one: - memset(f, 0, sizeof(*f)); - if (security_file_alloc(f)) { - list_add(&f->f_list, &free_list); - files_stat.nr_free_files++; - file_list_unlock(); - return NULL; - } - eventpoll_init_file(f); - atomic_set(&f->f_count,1); - f->f_version = 0; - f->f_uid = current->fsuid; - f->f_gid = current->fsgid; - f->f_owner.lock = RW_LOCK_UNLOCKED; - list_add(&f->f_list, &anon_list); file_list_unlock(); - return f; + goto got_one; } - /* - * Use a reserved one if we're the superuser - */ - if (files_stat.nr_free_files && !current->euid) - goto used_one; - /* - * Allocate a new one if we're below the limit. - */ - if (files_stat.nr_files < files_stat.max_files) { - file_list_unlock(); - f = kmem_cache_alloc(filp_cachep, SLAB_KERNEL); - file_list_lock(); - if (f) { - files_stat.nr_files++; - goto new_one; - } - /* Big problems... */ - printk(KERN_WARNING "VFS: filp allocation failed\n"); - - } else if (files_stat.max_files > old_max) { + file_list_unlock(); + /* Ran out of filps - report that */ + if (files_stat.max_files > old_max) { printk(KERN_INFO "VFS: file-max limit %d reached\n", files_stat.max_files); old_max = files_stat.max_files; + } else { + /* Big problems... */ + printk(KERN_WARNING "VFS: filp allocation failed\n"); } - file_list_unlock(); return NULL; } @@ -106,6 +130,7 @@ int init_private_file(struct file *filp, filp->f_uid = current->fsuid; filp->f_gid = current->fsgid; filp->f_op = dentry->d_inode->i_fop; + filp->f_list.next = NULL; if (filp->f_op->open) return filp->f_op->open(dentry->d_inode, filp); else @@ -143,9 +168,9 @@ void __fput(struct file * file) file_list_lock(); file->f_dentry = NULL; file->f_vfsmnt = NULL; - list_del(&file->f_list); - list_add(&file->f_list, &free_list); - files_stat.nr_free_files++; + if (file->f_list.next) + list_del(&file->f_list); + file_free(file); file_list_unlock(); dput(dentry); mntput(mnt); @@ -171,9 +196,9 @@ void put_filp(struct file *file) if(atomic_dec_and_test(&file->f_count)) { security_file_free(file); file_list_lock(); - list_del(&file->f_list); - list_add(&file->f_list, &free_list); - files_stat.nr_free_files++; + if (file->f_list.next) + list_del(&file->f_list); + file_free(file); file_list_unlock(); } } @@ -183,7 +208,8 @@ void file_move(struct file *file, struct if (!list) return; file_list_lock(); - list_del(&file->f_list); + if (file->f_list.next) + list_del(&file->f_list); list_add(&file->f_list, list); file_list_unlock(); } @@ -191,7 +217,9 @@ void file_move(struct file *file, struct void file_kill(struct file *file) { file_list_lock(); - list_del_init(&file->f_list); + if (file->f_list.next) + list_del(&file->f_list); + file->f_list.next = NULL; file_list_unlock(); } diff -puN include/linux/file.h~file_list_lock-contention-fix include/linux/file.h --- 25/include/linux/file.h~file_list_lock-contention-fix 2003-03-13 02:40:32.000000000 -0800 +++ 25-akpm/include/linux/file.h 2003-03-13 02:40:32.000000000 -0800 @@ -40,6 +40,9 @@ extern void FASTCALL(set_close_on_exec(u extern void put_filp(struct file *); extern int get_unused_fd(void); extern void FASTCALL(put_unused_fd(unsigned int fd)); +struct kmem_cache_s; +extern void filp_ctor(void * objp, struct kmem_cache_s *cachep, unsigned long cflags); +extern void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags); extern struct file ** alloc_fd_array(int); extern int expand_fd_array(struct files_struct *, int nr); _