From: Dipankar Sarma With the use of RCU in files structure, the look-up of files using fds can now be lock-free. The lookup is protected by rcu_read_lock()/rcu_read_unlock(). This patch changes the readers to use lock-free lookup. Signed-off-by: Maneesh Soni Signed-off-by: Ravikiran Thirumalai Signed-off-by: Dipankar Sarma Signed-off-by: Andrew Morton --- arch/mips/kernel/irixioctl.c | 5 +++-- arch/sparc64/solaris/ioctl.c | 7 ++++--- drivers/char/tty_io.c | 4 ++-- fs/fcntl.c | 4 ++-- fs/proc/base.c | 29 +++++++++++++++-------------- fs/select.c | 13 ++++++++++--- net/ipv4/netfilter/ipt_owner.c | 17 +++++++++-------- net/ipv6/netfilter/ip6t_owner.c | 11 ++++++----- security/selinux/hooks.c | 2 +- 9 files changed, 52 insertions(+), 40 deletions(-) diff -puN arch/mips/kernel/irixioctl.c~files-lock-free-fd-look-up arch/mips/kernel/irixioctl.c --- devel/arch/mips/kernel/irixioctl.c~files-lock-free-fd-look-up 2005-07-08 23:11:50.000000000 -0700 +++ devel-akpm/arch/mips/kernel/irixioctl.c 2005-07-08 23:11:50.000000000 -0700 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -33,7 +34,7 @@ static struct tty_struct *get_tty(int fd struct file *filp; struct tty_struct *ttyp = NULL; - spin_lock(¤t->files->file_lock); + rcu_read_lock(); filp = fcheck(fd); if(filp && filp->private_data) { ttyp = (struct tty_struct *) filp->private_data; @@ -41,7 +42,7 @@ static struct tty_struct *get_tty(int fd if(ttyp->magic != TTY_MAGIC) ttyp =NULL; } - spin_unlock(¤t->files->file_lock); + rcu_read_unlock(); return ttyp; } diff -puN arch/sparc64/solaris/ioctl.c~files-lock-free-fd-look-up arch/sparc64/solaris/ioctl.c --- devel/arch/sparc64/solaris/ioctl.c~files-lock-free-fd-look-up 2005-07-08 23:11:50.000000000 -0700 +++ devel-akpm/arch/sparc64/solaris/ioctl.c 2005-07-08 23:11:50.000000000 -0700 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -295,16 +296,16 @@ static inline int solaris_sockmod(unsign struct inode *ino; struct fdtable *fdt; /* I wonder which of these tests are superfluous... --patrik */ - spin_lock(¤t->files->file_lock); + rcu_read_lock(); fdt = files_fdtable(current->files); if (! fdt->fd[fd] || ! fdt->fd[fd]->f_dentry || ! (ino = fdt->fd[fd]->f_dentry->d_inode) || ! S_ISSOCK(ino->i_mode)) { - spin_unlock(¤t->files->file_lock); + rcu_read_unlock(); return TBADF; } - spin_unlock(¤t->files->file_lock); + rcu_read_unlock(); switch (cmd & 0xff) { case 109: /* SI_SOCKPARAMS */ diff -puN drivers/char/tty_io.c~files-lock-free-fd-look-up drivers/char/tty_io.c --- devel/drivers/char/tty_io.c~files-lock-free-fd-look-up 2005-07-08 23:11:50.000000000 -0700 +++ devel-akpm/drivers/char/tty_io.c 2005-07-08 23:11:50.000000000 -0700 @@ -2452,7 +2452,7 @@ static void __do_SAK(void *arg) } task_lock(p); if (p->files) { - spin_lock(&p->files->file_lock); + rcu_read_lock(); fdt = files_fdtable(p->files); for (i=0; i < fdt->max_fds; i++) { filp = fcheck_files(p->files, i); @@ -2467,7 +2467,7 @@ static void __do_SAK(void *arg) break; } } - spin_unlock(&p->files->file_lock); + rcu_read_unlock(); } task_unlock(p); } while_each_task_pid(session, PIDTYPE_SID, p); diff -puN fs/fcntl.c~files-lock-free-fd-look-up fs/fcntl.c --- devel/fs/fcntl.c~files-lock-free-fd-look-up 2005-07-08 23:11:50.000000000 -0700 +++ devel-akpm/fs/fcntl.c 2005-07-08 23:11:50.000000000 -0700 @@ -40,10 +40,10 @@ static inline int get_close_on_exec(unsi struct files_struct *files = current->files; struct fdtable *fdt; int res; - spin_lock(&files->file_lock); + rcu_read_lock(); fdt = files_fdtable(files); res = FD_ISSET(fd, fdt->close_on_exec); - spin_unlock(&files->file_lock); + rcu_read_unlock(); return res; } diff -puN fs/proc/base.c~files-lock-free-fd-look-up fs/proc/base.c --- devel/fs/proc/base.c~files-lock-free-fd-look-up 2005-07-08 23:11:50.000000000 -0700 +++ devel-akpm/fs/proc/base.c 2005-07-08 23:11:50.000000000 -0700 @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -274,16 +275,16 @@ static int proc_fd_link(struct inode *in files = get_files_struct(task); if (files) { - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); if (file) { *mnt = mntget(file->f_vfsmnt); *dentry = dget(file->f_dentry); - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); return 0; } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } return -ENOENT; @@ -1058,7 +1059,7 @@ static int proc_readfd(struct file * fil files = get_files_struct(p); if (!files) goto out; - spin_lock(&files->file_lock); + rcu_read_lock(); fdt = files_fdtable(files); for (fd = filp->f_pos-2; fd < fdt->max_fds; @@ -1067,7 +1068,7 @@ static int proc_readfd(struct file * fil if (!fcheck_files(files, fd)) continue; - spin_unlock(&files->file_lock); + rcu_read_unlock(); j = NUMBUF; i = fd; @@ -1079,12 +1080,12 @@ static int proc_readfd(struct file * fil ino = fake_ino(tid, PROC_TID_FD_DIR + fd); if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { - spin_lock(&files->file_lock); + rcu_read_lock(); break; } - spin_lock(&files->file_lock); + rcu_read_lock(); } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } out: @@ -1259,9 +1260,9 @@ static int tid_fd_revalidate(struct dent files = get_files_struct(task); if (files) { - spin_lock(&files->file_lock); + rcu_read_lock(); if (fcheck_files(files, fd)) { - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); if (task_dumpable(task)) { inode->i_uid = task->euid; @@ -1273,7 +1274,7 @@ static int tid_fd_revalidate(struct dent security_task_to_inode(task, inode); return 1; } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } d_drop(dentry); @@ -1365,7 +1366,7 @@ static struct dentry *proc_lookupfd(stru if (!files) goto out_unlock; inode->i_mode = S_IFLNK; - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); if (!file) goto out_unlock2; @@ -1373,7 +1374,7 @@ static struct dentry *proc_lookupfd(stru inode->i_mode |= S_IRUSR | S_IXUSR; if (file->f_mode & 2) inode->i_mode |= S_IWUSR | S_IXUSR; - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; @@ -1383,7 +1384,7 @@ static struct dentry *proc_lookupfd(stru return NULL; out_unlock2: - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); out_unlock: iput(inode); diff -puN fs/select.c~files-lock-free-fd-look-up fs/select.c --- devel/fs/select.c~files-lock-free-fd-look-up 2005-07-08 23:11:50.000000000 -0700 +++ devel-akpm/fs/select.c 2005-07-08 23:11:50.000000000 -0700 @@ -22,6 +22,7 @@ #include /* for STICKY_TIMEOUTS */ #include #include +#include #include @@ -185,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, l int retval, i; long __timeout = *timeout; - spin_lock(¤t->files->file_lock); + rcu_read_lock(); retval = max_select_fd(n, fds); - spin_unlock(¤t->files->file_lock); + rcu_read_unlock(); if (retval < 0) return retval; @@ -307,8 +308,10 @@ static int core_sys_select(int n, fd_set goto out_nofds; /* max_fdset can increase, so grab it once to avoid race */ + rcu_read_lock(); fdt = files_fdtable(current->files); max_fdset = fdt->max_fdset; + rcu_read_unlock(); if (n > max_fdset) n = max_fdset; @@ -565,10 +568,14 @@ int do_sys_poll(struct pollfd __user *uf struct poll_list *head; struct poll_list *walk; struct fdtable *fdt; + int max_fdset; /* Do a sanity check on nfds ... */ + rcu_read_lock(); fdt = files_fdtable(current->files); - if (nfds > fdt->max_fdset && nfds > OPEN_MAX) + max_fdset = fdt->max_fdset; + rcu_read_unlock(); + if (nfds > max_fdset && nfds > OPEN_MAX) return -EINVAL; poll_initwait(&table); diff -puN net/ipv4/netfilter/ipt_owner.c~files-lock-free-fd-look-up net/ipv4/netfilter/ipt_owner.c --- devel/net/ipv4/netfilter/ipt_owner.c~files-lock-free-fd-look-up 2005-07-08 23:11:50.000000000 -0700 +++ devel-akpm/net/ipv4/netfilter/ipt_owner.c 2005-07-08 23:11:50.000000000 -0700 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -36,18 +37,18 @@ match_comm(const struct sk_buff *skb, co task_lock(p); files = p->files; if(files) { - spin_lock(&files->file_lock); + rcu_read_lock(); fdt = files_fdtable(files); for (i=0; i < fdt->max_fds; i++) { if (fcheck_files(files, i) == skb->sk->sk_socket->file) { - spin_unlock(&files->file_lock); + rcu_read_unlock(); task_unlock(p); read_unlock(&tasklist_lock); return 1; } } - spin_unlock(&files->file_lock); + rcu_read_unlock(); } task_unlock(p); } while_each_thread(g, p); @@ -70,18 +71,18 @@ match_pid(const struct sk_buff *skb, pid task_lock(p); files = p->files; if(files) { - spin_lock(&files->file_lock); + rcu_read_lock(); fdt = files_fdtable(files); for (i=0; i < fdt->max_fds; i++) { if (fcheck_files(files, i) == skb->sk->sk_socket->file) { - spin_unlock(&files->file_lock); + rcu_read_unlock(); task_unlock(p); read_unlock(&tasklist_lock); return 1; } } - spin_unlock(&files->file_lock); + rcu_read_unlock(); } task_unlock(p); out: @@ -106,7 +107,7 @@ match_sid(const struct sk_buff *skb, pid task_lock(p); files = p->files; if (files) { - spin_lock(&files->file_lock); + rcu_read_lock(); fdt = files_fdtable(files); for (i=0; i < fdt->max_fds; i++) { if (fcheck_files(files, i) == file) { @@ -114,7 +115,7 @@ match_sid(const struct sk_buff *skb, pid break; } } - spin_unlock(&files->file_lock); + rcu_read_unlock(); } task_unlock(p); if (found) diff -puN net/ipv6/netfilter/ip6t_owner.c~files-lock-free-fd-look-up net/ipv6/netfilter/ip6t_owner.c --- devel/net/ipv6/netfilter/ip6t_owner.c~files-lock-free-fd-look-up 2005-07-08 23:11:50.000000000 -0700 +++ devel-akpm/net/ipv6/netfilter/ip6t_owner.c 2005-07-08 23:11:50.000000000 -0700 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -35,17 +36,17 @@ match_pid(const struct sk_buff *skb, pid task_lock(p); files = p->files; if(files) { - spin_lock(&files->file_lock); + rcu_read_lock(); fdt = files_fdtable(files); for (i=0; i < fdt->max_fds; i++) { if (fcheck_files(files, i) == skb->sk->sk_socket->file) { - spin_unlock(&files->file_lock); + rcu_read_unlock(); task_unlock(p); read_unlock(&tasklist_lock); return 1; } } - spin_unlock(&files->file_lock); + rcu_read_unlock(); } task_unlock(p); out: @@ -70,7 +71,7 @@ match_sid(const struct sk_buff *skb, pid task_lock(p); files = p->files; if (files) { - spin_lock(&files->file_lock); + rcu_read_lock(); fdt = files_fdtable(files); for (i=0; i < fdt->max_fds; i++) { if (fcheck_files(files, i) == file) { @@ -78,7 +79,7 @@ match_sid(const struct sk_buff *skb, pid break; } } - spin_unlock(&files->file_lock); + rcu_read_unlock(); } task_unlock(p); if (found) diff -puN security/selinux/hooks.c~files-lock-free-fd-look-up security/selinux/hooks.c --- devel/security/selinux/hooks.c~files-lock-free-fd-look-up 2005-07-08 23:11:50.000000000 -0700 +++ devel-akpm/security/selinux/hooks.c 2005-07-08 23:11:50.000000000 -0700 @@ -1736,7 +1736,7 @@ static inline void flush_unauthorized_fi continue; } if (devnull) { - atomic_inc(&devnull->f_count); + rcuref_inc(&devnull->f_count); } else { devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR); if (!devnull) { _