# This is a BitKeeper generated patch for the following project: # Project Name: Linux kernel tree # This patch format is intended for GNU patch command version 2.5 or higher. # This patch includes the following deltas: # ChangeSet 1.658 -> 1.689 # fs/nfsd/nfs3xdr.c 1.12 -> 1.13 # include/asm-i386/types.h 1.2 -> 1.4 # fs/autofs/dirhash.c 1.2 -> 1.3 # include/linux/init_task.h 1.11 -> 1.13 # include/linux/sched.h 1.61 -> 1.65 # kernel/fork.c 1.44 -> 1.50 # kernel/sys.c 1.18 -> 1.23 # kernel/capability.c 1.2 -> 1.3 # init/main.c 1.46 -> 1.48 # kernel/Makefile 1.9 -> 1.11 # kernel/sched.c 1.75 -> 1.80 # kernel/timer.c 1.9 -> 1.10 # kernel/kmod.c 1.8 -> 1.9 # kernel/exit.c 1.33 -> 1.40 # kernel/user.c 1.1 -> 1.2 # kernel/signal.c 1.17 -> 1.18 # drivers/char/tty_io.c 1.25 -> 1.29 # include/asm-i386/bitops.h 1.12 -> 1.13 # fs/nfs/nfs3xdr.c 1.5 -> 1.6 # (new) -> 1.1 arch/i386/kernel/gdbstub.c # (new) -> 1.1 Documentation/i386/gdb-serial.txt # (new) -> 1.1 arch/i386/kernel/gdbstart.c # (new) -> 1.4 include/linux/pid.h # (new) -> 1.1 arch/i386/kernel/gdbstart # (new) -> 1.1 include/linux/gdb.h # (new) -> 1.5 include/linux/idtag.h # (new) -> 1.1 drivers/char/gdbserial.c # (new) -> 1.6 kernel/pid.c # (new) -> 1.5 kernel/idtag.c # # The following is the BitKeeper ChangeSet Log # -------------------------------------------- # 02/05/24 wli@holomorphy.com 1.659 # Some compile fixes, some extraneous kgdb files lingering about, and initial idtag support. # -------------------------------------------- # 02/05/24 wli@holomorphy.com 1.660 # Bootstrap ordering fixes and attaching tags for more kinds of ID's. # -------------------------------------------- # 02/05/24 wli@holomorphy.com 1.661 # Minor updates for testing and small tidbits of additional functionality. # -------------------------------------------- # 02/05/24 wli@elm3b52.eng.beaverton.ibm.com 1.662 # O(1) count_active_tasks(). # -------------------------------------------- # 02/05/24 wli@elm3b52.eng.beaverton.ibm.com 1.663 # sched.c: # Correct load average calculation by not incrementing ->nr_uninterruptible if a task is a freshly forked child or if the branch activating a task during wakeup is not taken. # -------------------------------------------- # 02/05/24 wli@elm3b52.eng.beaverton.ibm.com 1.664 # Convert session_of_pgrp() to idtags and add a list walking helper. # -------------------------------------------- # 02/05/24 wli@holomorphy.com 1.665 # sys.c: # Convert sys_setpgid() to idtags. # -------------------------------------------- # 02/05/24 wli@holomorphy.com 1.666 # signal.c: # Convert kill_pg_info() and kill_sl_info() to idtags. # exit.c: # Convert will_become_orphaned_pgrp() to idtags. # -------------------------------------------- # 02/05/25 wli@holomorphy.com 1.667 # exit.c: # Convert has_stopped_jobs() to idtags. Eventually keep a counter in the process group leader for this. # -------------------------------------------- # 02/05/25 wli@holomorphy.com 1.668 # exit.c: # Fix forget_original_parent(). # fork.c: # Initialize children's real_sibling and real_parent. # init_task.h: # Initialize init_task's real_sibling and real_children. # sched.h: # Add real_children and real_sibling to task_t for forget_original_parent(). # -------------------------------------------- # 02/05/25 wli@holomorphy.com 1.669 # init_task.h: # Initialize user_struct links in task for init_task. # sys.c: # Fix sys_setpriority() and sys_getpriority(). # kmod.c: # Move usermodehelper tasks between struct user_structs. # fork.c: # Add children to struct user_struct. # exit.c: # Remove tasks from struct user_struct. # user.c: # Properly initialize lists of processes for struct user_structs. # sched.h: # Link tasks into struct user_struct. # -------------------------------------------- # 02/05/25 wli@holomorphy.com 1.670 # exit.c: # Fix typo resulting in use of uninitialized variable. # -------------------------------------------- # 02/05/25 wli@holomorphy.com 1.671 # idtag.h: # Export free_pid() and alloc_pid(). # types.h: # Add LONG_SHIFT. # bitops.h: # Add lg(). # sched.c: # Add call to pid_init(). # idtag.c: # free_pid() when the last reference to a pid goes away. # fork.c: # Fix get_pid(). # pid.h, pid.c: # new file # -------------------------------------------- # 02/05/25 wli@holomorphy.com 1.672 # capability.c: # Fix cap_set_pg(). # -------------------------------------------- # 02/05/25 wli@holomorphy.com 1.673 # Makefile: # Add pid.o to the list of objects to build. # -------------------------------------------- # 02/05/25 wli@holomorphy.com 1.674 # tty_io.c: # Make do_tty_hangup() use idtags. # -------------------------------------------- # 02/05/25 wli@holomorphy.com 1.675 # tty_io.c: # Convert dissociate_ctty() to using idtags. # -------------------------------------------- # 02/05/25 wli@holomorphy.com 1.676 # tty_io.c: # Convert tiocsctty() to idtags. # -------------------------------------------- # 02/05/25 wli@holomorphy.com 1.677 # tty_io.c: # Convert release_dev() to idtags. # -------------------------------------------- # 02/05/26 wli@holomorphy.com 1.678 # pid.c: # Fix deadlock in pid space exhaustion case arising from failure to release the pid_lock prior to return from alloc_pid(). # -------------------------------------------- # 02/05/26 wli@holomorphy.com 1.679 # pid.c: # Cleanup of gotos in alloc_pid() suggested by hch. # -------------------------------------------- # 02/05/26 wli@holomorphy.com 1.680 # pid.h: # Cleanup of PID_H to _LINUX_PID_H and addition of copyright attribution suggested by hch. # -------------------------------------------- # 02/05/26 wli@holomorphy.com 1.681 # main.c: # Remove __init from extern decl of idtag_init() as suggested by hch. # pid.c: # Some commentary and copyright attribution, as suggested by hch. # -------------------------------------------- # 02/05/26 wli@holomorphy.com 1.682 # pid.c: # Change uses of LONG_SHIFT to BITS_PER_LONG_SHIFT # types.h: # Cleanup of LONG_SHIFT to BITS_PER_LONG_SHIFT as suggested by hch. # -------------------------------------------- # 02/05/26 wli@holomorphy.com 1.683 # pid.c: # Cleanup of multiple return paths in alloc_pid() as suggested by hch. # -------------------------------------------- # 02/05/26 wli@holomorphy.com 1.684 # fork.c: # Elimination of get_pid() inline in favor of calling alloc_pid() directly in fork(), as suggested by hch. # -------------------------------------------- # 02/05/26 wli@holomorphy.com 1.685 # idtag.h: # Using _LINUX_IDTAG_H instead of _IDTAG_H and adding a copyright attribution, as suggested by hch. # idtag.c: # Copyright attribution and brief documentation, as suggested by hch. # -------------------------------------------- # 02/05/26 wli@holomorphy.com 1.686 # pid.h: # Change uses of LONG_SHIFT to BITS_PER_LONG_SHIFT, as suggested by hch. # -------------------------------------------- # 02/05/26 wli@holomorphy.com 1.687 # pid.h: # Change definition of PID_MAP_DEPTH to use ARRAY_SIZE. # -------------------------------------------- # 02/05/26 wli@holomorphy.com 1.688 # sched.c: # Clean up extern __init decls, as suggested by hch. Maybe these callees should be moved to init/main.c # -------------------------------------------- # 02/05/26 wli@holomorphy.com 1.689 # fork.c: # Fix use of uninitialized flags to clone_flags as was previously passed to get_pid(). # -------------------------------------------- # diff -Nru a/drivers/char/tty_io.c b/drivers/char/tty_io.c --- a/drivers/char/tty_io.c Sun May 26 11:51:18 2002 +++ b/drivers/char/tty_io.c Sun May 26 11:51:18 2002 @@ -431,8 +431,9 @@ { struct tty_struct *tty = (struct tty_struct *) data; struct file * cons_filp = NULL; - struct task_struct *p; - struct list_head *l; + task_t *task; + list_t *l, *elem; + struct idtag *idtag; int closecount = 0, n; if (!tty) @@ -495,19 +496,31 @@ } } + if (tty->session <= 0) + goto breakout; + read_lock(&tasklist_lock); - for_each_task(p) { - if ((tty->session > 0) && (p->session == tty->session) && - p->leader) { - send_sig(SIGHUP,p,1); - send_sig(SIGCONT,p,1); - if (tty->pgrp > 0) - p->tty_old_pgrp = tty->pgrp; - } - if (p->tty == tty) - p->tty = NULL; + idtag = find_tag(IDTAG_SID, tty->session); + if (!idtag) + goto breakout_unlock; + + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_SID); + + if (task->tty == tty) + task->tty = NULL; + + if (!task->leader) + continue; + + send_sig(SIGHUP, task, 1); + send_sig(SIGCONT, task, 1); + if (tty->pgrp > 0) + task->tty_old_pgrp = tty->pgrp; } +breakout_unlock: read_unlock(&tasklist_lock); +breakout: tty->flags = 0; tty->session = 0; @@ -569,7 +582,9 @@ void disassociate_ctty(int on_exit) { struct tty_struct *tty = current->tty; - struct task_struct *p; + task_t *task; + list_t *elem; + struct idtag *idtag; int tty_pgrp = -1; lock_kernel(); @@ -597,9 +612,16 @@ tty->pgrp = -1; read_lock(&tasklist_lock); - for_each_task(p) - if (p->session == current->session) - p->tty = NULL; + idtag = find_tag(IDTAG_SID, current->session); + + if (!idtag) + goto out_unlock; + + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_SID); + task->tty = NULL; + } +out_unlock: read_unlock(&tasklist_lock); unlock_kernel(); } @@ -1219,13 +1241,29 @@ * tty. Also, clear redirect if it points to either tty. */ if (tty_closing || o_tty_closing) { - struct task_struct *p; + task_t *task; + list_t *elem; + struct idtag *idtag; read_lock(&tasklist_lock); - for_each_task(p) { - if (p->tty == tty || (o_tty && p->tty == o_tty)) - p->tty = NULL; + idtag = find_tag(IDTAG_SID, tty->session); + if (!idtag) + goto detach_o_tty; + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_SID); + task->tty = NULL; + } +detach_o_tty: + if (!o_tty) + goto out_unlock; + idtag = find_tag(IDTAG_SID, o_tty->session); + if (!idtag) + goto out_unlock; + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_SID); + task->tty = NULL; } +out_unlock: read_unlock(&tasklist_lock); if (redirect == tty || (o_tty && redirect == o_tty)) @@ -1541,6 +1579,10 @@ static int tiocsctty(struct tty_struct *tty, int arg) { + task_t *task; + list_t *elem; + struct idtag *idtag; + if (current->leader && (current->session == tty->session)) return 0; @@ -1550,25 +1592,32 @@ */ if (!current->leader || current->tty) return -EPERM; - if (tty->session > 0) { + if (tty->session <= 0) + goto out_no_detach; + + /* + * This tty is already the controlling + * tty for another session group! + */ + if ((arg == 1) && capable(CAP_SYS_ADMIN)) { /* - * This tty is already the controlling - * tty for another session group! + * Steal it away */ - if ((arg == 1) && capable(CAP_SYS_ADMIN)) { - /* - * Steal it away - */ - struct task_struct *p; - - read_lock(&tasklist_lock); - for_each_task(p) - if (p->tty == tty) - p->tty = NULL; - read_unlock(&tasklist_lock); - } else - return -EPERM; - } + + read_lock(&tasklist_lock); + idtag = find_tag(IDTAG_SID, tty->session); + if (!idtag) + goto out_unlock; + + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_SID); + task->tty = NULL; + } +out_unlock: + read_unlock(&tasklist_lock); + } else + return -EPERM; +out_no_detach: task_lock(current); current->tty = tty; task_unlock(current); diff -Nru a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c --- a/fs/autofs/dirhash.c Sun May 26 11:51:18 2002 +++ b/fs/autofs/dirhash.c Sun May 26 11:51:18 2002 @@ -10,6 +10,8 @@ * * ------------------------------------------------------------------------- */ +#include +#include #include #include #include "autofs_i.h" diff -Nru a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c --- a/fs/nfs/nfs3xdr.c Sun May 26 11:51:18 2002 +++ b/fs/nfs/nfs3xdr.c Sun May 26 11:51:18 2002 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff -Nru a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c --- a/fs/nfsd/nfs3xdr.c Sun May 26 11:51:18 2002 +++ b/fs/nfsd/nfs3xdr.c Sun May 26 11:51:18 2002 @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include diff -Nru a/include/asm-i386/bitops.h b/include/asm-i386/bitops.h --- a/include/asm-i386/bitops.h Sun May 26 11:51:18 2002 +++ b/include/asm-i386/bitops.h Sun May 26 11:51:18 2002 @@ -401,6 +401,20 @@ } /** + * lg - integer logarithm base 2 + * @n - integer to take log base 2 of + * + * undefined if 0 + */ +static inline unsigned long lg(unsigned long n) +{ + asm("bsrl %1,%0" + :"=r" (n) + :"r" (n)); + return n; +} + +/** * __ffs - find first bit in word. * @word: The word to search * diff -Nru a/include/asm-i386/types.h b/include/asm-i386/types.h --- a/include/asm-i386/types.h Sun May 26 11:51:18 2002 +++ b/include/asm-i386/types.h Sun May 26 11:51:18 2002 @@ -41,7 +41,8 @@ typedef signed long long s64; typedef unsigned long long u64; -#define BITS_PER_LONG 32 +#define BITS_PER_LONG_SHIFT 5 +#define BITS_PER_LONG (1 << BITS_PER_LONG_SHIFT) /* DMA addresses come in generic and 64-bit flavours. */ diff -Nru a/include/linux/idtag.h b/include/linux/idtag.h --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/include/linux/idtag.h Sun May 26 11:51:18 2002 @@ -0,0 +1,64 @@ +#ifndef _LINUX_IDTAG_H +#define _LINUX_IDTAG_H + +/* + * idtag definitions to support direct access to tasks sharing ID's + * (C) 2002 William Irwin, IBM + */ + +#include + +struct task_struct; + +enum idtag_type +{ + IDTAG_PID, + IDTAG_PGID, + IDTAG_SID, + IDTAG_TGID, + IDTAG_MAX +}; + +struct idtag +{ + unsigned long tag; + enum idtag_type type; + atomic_t count; + list_t idtag_hash_chain; + list_t task_list; +}; + +struct idtag_link +{ + unsigned long tag; + list_t idtag_chain; + struct idtag *idtag; +}; + +#define idtag_task(elem, tagtype) \ + list_entry(elem, struct task_struct, idtags[tagtype].idtag_chain) + +/* + * attach_tag() must be called without the tasklist_lock. + */ +extern int FASTCALL(attach_tag(struct task_struct *, + enum idtag_type, + unsigned long)); + + +/* + * detach_tag() must be called with the tasklist_lock held. + */ +extern int FASTCALL(detach_tag(struct task_struct *task, enum idtag_type)); + +/* + * Quick & dirty hash table lookup. + */ +extern struct idtag *FASTCALL(find_tag(enum idtag_type, unsigned long)); + +extern int FASTCALL(idtag_unused(unsigned long)); + +extern int alloc_pid(void); +extern void free_pid(unsigned long); + +#endif /* _LINUX_IDTAG_H */ diff -Nru a/include/linux/init_task.h b/include/linux/init_task.h --- a/include/linux/init_task.h Sun May 26 11:51:18 2002 +++ b/include/linux/init_task.h Sun May 26 11:51:18 2002 @@ -58,6 +58,9 @@ parent: &tsk, \ children: LIST_HEAD_INIT(tsk.children), \ sibling: LIST_HEAD_INIT(tsk.sibling), \ + real_children: LIST_HEAD_INIT(tsk.real_children), \ + real_sibling: LIST_HEAD_INIT(tsk.real_sibling), \ + user_task_list: LIST_HEAD_INIT(tsk.user_task_list), \ thread_group: LIST_HEAD_INIT(tsk.thread_group), \ wait_chldexit: __WAIT_QUEUE_HEAD_INITIALIZER(tsk.wait_chldexit),\ real_timer: { \ diff -Nru a/include/linux/pid.h b/include/linux/pid.h --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/include/linux/pid.h Sun May 26 11:51:18 2002 @@ -0,0 +1,27 @@ +#ifndef _LINUX_PID_H +#define _LINUX_PID_H + +/* + * (C) 2002 William Irwin, IBM + * + * supporting macros for pid allocation + */ + +#define PID_MAX 0x8000 +#define RESERVED_PIDS 300 + +#define MAP0_SIZE (PID_MAX >> BITS_PER_LONG_SHIFT) +#define MAP1_SIZE (MAP0_SIZE >> BITS_PER_LONG_SHIFT) +#define MAP2_SIZE (MAP1_SIZE >> BITS_PER_LONG_SHIFT) + +#define MAP0_SHIFT BITS_PER_LONG_SHIFT +#define MAP1_SHIFT (2*BITS_PER_LONG_SHIFT) +#define MAP2_SHIFT (3*BITS_PER_LONG_SHIFT) + +#define PID_MAP_MASK (BITS_PER_LONG - 1) + +#define PID_MAP_DEPTH (ARRAY_SIZE(pid_map) - 1) + +extern unsigned long npids; + +#endif /* _LINUX_PID_H */ diff -Nru a/include/linux/sched.h b/include/linux/sched.h --- a/include/linux/sched.h Sun May 26 11:51:18 2002 +++ b/include/linux/sched.h Sun May 26 11:51:18 2002 @@ -29,6 +29,7 @@ #include #include #include +#include struct exec_domain; @@ -80,6 +81,7 @@ extern int nr_threads; extern int last_pid; extern unsigned long nr_running(void); +extern unsigned long nr_uninterruptible(void); #include #include @@ -238,6 +240,9 @@ /* Hash table maintenance information */ struct user_struct *next, **pprev; uid_t uid; + + rwlock_t lock; + list_t task_list; }; #define get_current_user() ({ \ @@ -245,6 +250,8 @@ atomic_inc(&__user->__count); \ __user; }) +extern struct user_struct *find_user(uid_t); + extern struct user_struct root_user; #define INIT_USER (&root_user) @@ -298,14 +305,21 @@ */ struct task_struct *real_parent; /* real parent process (when being debugged) */ struct task_struct *parent; /* parent process */ - struct list_head children; /* list of my children */ - struct list_head sibling; /* linkage in my parent's children list */ - struct list_head thread_group; + list_t children; /* list of my children */ + list_t sibling; /* linkage in my parent's children list */ + + list_t real_children; /* list of those where I'm ->real_parent */ + list_t real_sibling; /* linkage in my parent's real_children list */ + list_t thread_group; /* PID hash table linkage. */ struct task_struct *pidhash_next; struct task_struct **pidhash_pprev; + struct idtag_link idtags[IDTAG_MAX]; + + list_t user_task_list; + wait_queue_head_t wait_chldexit; /* for wait4() */ struct completion *vfork_done; /* for vfork() */ @@ -743,45 +757,66 @@ __ret; \ }) -#define remove_parent(p) list_del_init(&(p)->sibling) -#define add_parent(p, parent) list_add_tail(&(p)->sibling,&(parent)->children) +static inline void remove_parent(task_t *task) +{ + list_del_init(&task->sibling); +} -#define REMOVE_LINKS(p) do { \ - list_del_init(&(p)->tasks); \ - remove_parent(p); \ - } while (0) - -#define SET_LINKS(p) do { \ - list_add_tail(&(p)->tasks,&init_task.tasks); \ - add_parent(p, (p)->parent); \ - } while (0) +static inline void add_parent(task_t *task, task_t *parent) +{ + list_add_tail(&task->sibling, &parent->children); +} + +static inline void remove_real_parent(task_t *task) +{ + list_del_init(&task->real_sibling); +} + +static inline void add_real_parent(task_t *task, task_t *real_parent) +{ + list_add_tail(&task->real_sibling, &real_parent->real_children); +} + +static inline void REMOVE_LINKS(task_t *task) +{ + list_del_init(&task->tasks); + remove_parent(task); + remove_real_parent(task); +} + +static inline void SET_LINKS(task_t *task) +{ + list_add_tail(&task->tasks, &init_task.tasks); + add_parent(task, task->parent); + add_real_parent(task, task->real_parent); +} -static inline struct task_struct *eldest_child(struct task_struct *p) +static inline task_t *eldest_child(struct task_struct *p) { if (list_empty(&p->children)) return NULL; return list_entry(p->children.next,struct task_struct,sibling); } -static inline struct task_struct *youngest_child(struct task_struct *p) +static inline task_t *youngest_child(struct task_struct *p) { if (list_empty(&p->children)) return NULL; return list_entry(p->children.prev,struct task_struct,sibling); } -static inline struct task_struct *older_sibling(struct task_struct *p) +static inline task_t *older_sibling(struct task_struct *p) { if (p->sibling.prev==&p->parent->children) return NULL; return list_entry(p->sibling.prev,struct task_struct,sibling); } -static inline struct task_struct *younger_sibling(struct task_struct *p) +static inline task_t *younger_sibling(struct task_struct *p) { if (p->sibling.next==&p->parent->children) return NULL; return list_entry(p->sibling.next,struct task_struct,sibling); } -#define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks) -#define prev_task(p) list_entry((p)->tasks.prev, struct task_struct, tasks) +#define next_task(p) list_entry((p)->tasks.next, task_t, tasks) +#define prev_task(p) list_entry((p)->tasks.prev, task_t, tasks) #define for_each_task(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) diff -Nru a/init/main.c b/init/main.c --- a/init/main.c Sun May 26 11:51:18 2002 +++ b/init/main.c Sun May 26 11:51:18 2002 @@ -72,6 +72,7 @@ extern void sbus_init(void); extern void sysctl_init(void); extern void signals_init(void); +extern void idtag_init(void); extern void radix_tree_init(void); extern void free_initmem(void); @@ -387,6 +388,7 @@ #endif mem_init(); kmem_cache_sizes_init(); + idtag_init(); pgtable_cache_init(); mempages = num_physpages; diff -Nru a/kernel/Makefile b/kernel/Makefile --- a/kernel/Makefile Sun May 26 11:51:18 2002 +++ b/kernel/Makefile Sun May 26 11:51:18 2002 @@ -14,8 +14,8 @@ obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \ module.o exit.o itimer.o info.o time.o softirq.o resource.o \ - sysctl.o capability.o ptrace.o timer.o user.o \ - signal.o sys.o kmod.o context.o futex.o platform.o + sysctl.o capability.o ptrace.o timer.o idtag.o user.o \ + signal.o sys.o kmod.o context.o futex.o platform.o pid.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += ksyms.o diff -Nru a/kernel/capability.c b/kernel/capability.c --- a/kernel/capability.c Sun May 26 11:51:18 2002 +++ b/kernel/capability.c Sun May 26 11:51:18 2002 @@ -83,18 +83,25 @@ kernel_cap_t *inheritable, kernel_cap_t *permitted) { - struct task_struct *target; + task_t *target; + struct idtag *idtag; + list_t *elem; - /* FIXME: do we need to have a write lock here..? */ - read_lock(&tasklist_lock); - for_each_task(target) { - if (target->pgrp != pgrp) - continue; - target->cap_effective = *effective; - target->cap_inheritable = *inheritable; - target->cap_permitted = *permitted; - } - read_unlock(&tasklist_lock); + /* FIXME: do we need to have a write lock here..? */ + read_lock(&tasklist_lock); + idtag = find_tag(IDTAG_PGID, pgrp); + + if (!idtag) + goto out; + + list_for_each(elem, &idtag->task_list) { + target = idtag_task(elem, IDTAG_PGID); + target->cap_effective = *effective; + target->cap_inheritable = *inheritable; + target->cap_permitted = *permitted; + } +out: + read_unlock(&tasklist_lock); } /* set capabilities for all processes other than 1 and self */ diff -Nru a/kernel/exit.c b/kernel/exit.c --- a/kernel/exit.c Sun May 26 11:51:18 2002 +++ b/kernel/exit.c Sun May 26 11:51:18 2002 @@ -60,7 +60,10 @@ #ifdef CONFIG_SMP wait_task_inactive(p); #endif + write_lock(&p->user->lock); atomic_dec(&p->user->processes); + list_del(&p->user_task_list); + write_unlock(&p->user->lock); free_uid(p->user); unhash_process(p); @@ -86,23 +89,27 @@ */ int session_of_pgrp(int pgrp) { - struct task_struct *p; - int fallback; + task_t *task; + list_t *elem; + struct idtag *idtag; + int sid = -1; - fallback = -1; read_lock(&tasklist_lock); - for_each_task(p) { - if (p->session <= 0) - continue; - if (p->pgrp == pgrp) { - fallback = p->session; - break; + idtag = find_tag(IDTAG_PGID, pgrp); + if (!idtag) + goto out; + + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_PGID); + + if (task->session > 0) { + sid = task->session; + goto out; } - if (p->pid == pgrp) - fallback = p->session; } +out: read_unlock(&tasklist_lock); - return fallback; + return sid; } /* @@ -113,45 +120,60 @@ * * "I ask you, have you ever known what it is to be an orphan?" */ -static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task) +static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task) { - struct task_struct *p; + task_t *task; + struct idtag *idtag; + list_t *elem; + int ret = 1; read_lock(&tasklist_lock); - for_each_task(p) { - if ((p == ignored_task) || (p->pgrp != pgrp) || - (p->state == TASK_ZOMBIE) || - (p->parent->pid == 1)) + idtag = find_tag(IDTAG_PGID, pgrp); + if (!idtag) + goto out; + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_PGID); + if (task == ignored_task + || task->state == TASK_ZOMBIE + || task->parent->pid == 1) continue; - if ((p->parent->pgrp != pgrp) && - (p->parent->session == p->session)) { - read_unlock(&tasklist_lock); - return 0; + if (task->parent->pgrp != pgrp + && task->parent->session == task->session) { + ret = 0; + goto out; } } +out: read_unlock(&tasklist_lock); - return 1; /* (sighing) "Often!" */ + return ret; /* (sighing) "Often!" */ } int is_orphaned_pgrp(int pgrp) { - return will_become_orphaned_pgrp(pgrp, 0); + return will_become_orphaned_pgrp(pgrp, NULL); } static inline int has_stopped_jobs(int pgrp) { int retval = 0; - struct task_struct * p; + task_t *task; + list_t *elem; + struct idtag *idtag; read_lock(&tasklist_lock); - for_each_task(p) { - if (p->pgrp != pgrp) - continue; - if (p->state != TASK_STOPPED) + idtag = find_tag(IDTAG_PGID, pgrp); + if (!idtag) + goto out; + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_PGID); + + if (task->state != TASK_STOPPED) continue; + retval = 1; - break; + goto out; } +out: read_unlock(&tasklist_lock); return retval; } @@ -235,11 +257,14 @@ * group, and if no such member exists, give it to * the global child reaper process (ie "init") */ -static inline void forget_original_parent(struct task_struct * father) +static inline void forget_original_parent(task_t *father) { - struct task_struct * p, *reaper; + task_t *task, *reaper; + list_t *elem; + list_t new_for_reaper = LIST_HEAD_INIT(new_for_reaper); + list_t new_for_child_reaper = LIST_HEAD_INIT(new_for_child_reaper); - read_lock(&tasklist_lock); + write_lock_irq(&tasklist_lock); /* Next in our thread group, if they're not already exiting */ reaper = father; @@ -252,22 +277,38 @@ if (reaper == father) reaper = child_reaper; - for_each_task(p) { - if (p->real_parent == father) { - /* We dont want people slaying init */ - p->exit_signal = SIGCHLD; - p->self_exec_id++; - - /* Make sure we're not reparenting to ourselves */ - if (p == reaper) - p->real_parent = child_reaper; - else - p->real_parent = reaper; + /* Make sure we're not reparenting to ourselves */ + if (reaper->real_parent == father) { + list_del(&reaper->real_sibling); + reaper->exit_signal = SIGCHLD; + reaper->self_exec_id++; + reaper->real_parent = child_reaper; + list_add(&reaper->real_sibling, &child_reaper->real_children); + if (reaper->pdeath_signal) + send_sig(reaper->pdeath_signal, reaper, 0); + } - if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0); - } + /* + * Kill off children, one by one. + * Finding an O(1) method of doing this would be nice. + */ + list_for_each(elem, &father->real_children) { + task = list_entry(elem, task_t, real_sibling); + + /* We dont want people slaying init */ + task->exit_signal = SIGCHLD; + task->self_exec_id++; + + task->real_parent = reaper; + + if (task->pdeath_signal) + send_sig(task->pdeath_signal, task, 0); } - read_unlock(&tasklist_lock); + + list_splice(&father->real_children, &reaper->real_children); + INIT_LIST_HEAD(&father->real_children); + + write_unlock_irq(&tasklist_lock); } static inline void close_files(struct files_struct * files) @@ -507,6 +548,11 @@ write_lock_irq(&tasklist_lock); } } + + detach_tag(current, IDTAG_PID); + detach_tag(current, IDTAG_PGID); + detach_tag(current, IDTAG_SID); + detach_tag(current, IDTAG_TGID); /* * No need to unlock IRQs, we'll schedule() immediately diff -Nru a/kernel/fork.c b/kernel/fork.c --- a/kernel/fork.c Sun May 26 11:51:18 2002 +++ b/kernel/fork.c Sun May 26 11:51:18 2002 @@ -42,7 +42,6 @@ int max_threads; unsigned long total_forks; /* Handle normal Linux uptimes. */ -int last_pid; struct task_struct *pidhash[PIDHASH_SZ]; @@ -127,55 +126,6 @@ kmem_cache_free(task_struct_cachep,tsk); } -/* Protects next_safe and last_pid. */ -spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED; - -static int get_pid(unsigned long flags) -{ - static int next_safe = PID_MAX; - struct task_struct *p; - int pid; - - if (flags & CLONE_IDLETASK) - return 0; - - spin_lock(&lastpid_lock); - if((++last_pid) & 0xffff8000) { - last_pid = 300; /* Skip daemons etc. */ - goto inside; - } - if(last_pid >= next_safe) { -inside: - next_safe = PID_MAX; - read_lock(&tasklist_lock); - repeat: - for_each_task(p) { - if(p->pid == last_pid || - p->pgrp == last_pid || - p->tgid == last_pid || - p->session == last_pid) { - if(++last_pid >= next_safe) { - if(last_pid & 0xffff8000) - last_pid = 300; - next_safe = PID_MAX; - } - goto repeat; - } - if(p->pid > last_pid && next_safe > p->pid) - next_safe = p->pid; - if(p->pgrp > last_pid && next_safe > p->pgrp) - next_safe = p->pgrp; - if(p->session > last_pid && next_safe > p->session) - next_safe = p->session; - } - read_unlock(&tasklist_lock); - } - pid = last_pid; - spin_unlock(&lastpid_lock); - - return pid; -} - static inline int dup_mmap(struct mm_struct * mm) { struct vm_area_struct * mpnt, *tmp, **pprev; @@ -632,8 +582,11 @@ goto bad_fork_free; } + write_lock(&p->user->lock); atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); + list_add(&p->user_task_list, &p->user->task_list); + write_unlock(&p->user->lock); /* * Counter increases are protected by @@ -660,13 +613,22 @@ p->state = TASK_UNINTERRUPTIBLE; copy_flags(clone_flags, p); - p->pid = get_pid(clone_flags); + if (clone_flags & CLONE_IDLETASK) + p->pid = 0; + else + p->pid = alloc_pid(); + attach_tag(p, IDTAG_PID, p->pid); + attach_tag(p, IDTAG_PGID, p->pgrp); + attach_tag(p, IDTAG_SID, p->session); + attach_tag(p, IDTAG_TGID, p->tgid); p->proc_dentry = NULL; INIT_LIST_HEAD(&p->run_list); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); + INIT_LIST_HEAD(&p->real_children); + INIT_LIST_HEAD(&p->real_sibling); init_waitqueue_head(&p->wait_chldexit); p->vfork_done = NULL; if (clone_flags & CLONE_VFORK) { diff -Nru a/kernel/idtag.c b/kernel/idtag.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/kernel/idtag.c Sun May 26 11:51:18 2002 @@ -0,0 +1,165 @@ +#include +#include +#include +#include +#include + +/* + * idtags for tasks + * (C) 2002 William Irwin, IBM + * idtags are backing objects for tasks sharing a given ID to chain + * against. There is very little to them aside from hashing them and + * parking tasks using given ID's on a list. + * + * TODO: use per-bucket locks. + */ + +static kmem_cache_t *idtag_cache; + +#define IDHASH_SIZE 4096 +static list_t idtag_hash[IDTAG_MAX][IDHASH_SIZE]; + +static spinlock_t idtag_lock = SPIN_LOCK_UNLOCKED; + +static inline unsigned idtag_hashfn(unsigned long tag) +{ + return ((tag >> 8) ^ tag) & (IDHASH_SIZE - 1); +} + +static inline struct idtag *idtag_alloc(void) +{ + return kmem_cache_alloc(idtag_cache, SLAB_KERNEL); +} + +static inline void idtag_free(struct idtag *idtag) +{ + kmem_cache_free(idtag_cache, idtag); +} + +static inline void init_idtag( struct idtag *idtag, + enum idtag_type type, + unsigned long tag) +{ + INIT_LIST_HEAD(&idtag->task_list); + atomic_set(&idtag->count, 0); + idtag->type = type; + idtag->tag = tag; + list_add(&idtag->idtag_hash_chain, + &idtag_hash[type][idtag_hashfn(tag)]); +} + +struct idtag *find_tag(enum idtag_type type, unsigned long tag) +{ + list_t *bucket, *elem; + struct idtag *idtag; + + bucket = &idtag_hash[type][idtag_hashfn(tag)]; + + list_for_each(elem, bucket) { + idtag = list_entry(elem, struct idtag, idtag_hash_chain); + if (idtag->tag == tag) + return idtag; + } + return NULL; +} + +int idtag_unused(unsigned long tag) +{ + enum idtag_type type; + + for (type = 0; type < IDTAG_MAX; ++type) + if (find_tag(type, tag)) + return 0; + + return 1; +} + +void __init idtag_hash_init(void) +{ + int i, j; + + for (i = 0; i < IDTAG_MAX; ++i) + for (j = 0; j < IDHASH_SIZE; ++j) + INIT_LIST_HEAD(&idtag_hash[i][j]); +} + +void __init idtag_init(void) +{ + idtag_cache = kmem_cache_create("idtag_cache", + sizeof(struct idtag), + 0, + SLAB_HWCACHE_ALIGN, + NULL, + NULL); +} + +struct idtag *get_tag(enum idtag_type type, unsigned long tag) +{ + struct idtag *idtag; + + spin_lock(&idtag_lock); + idtag = find_tag(type, tag); + + if (!idtag) { + struct idtag *raced_tag; + spin_unlock(&idtag_lock); + idtag = idtag_alloc(); + spin_lock(&idtag_lock); + raced_tag = find_tag(type, tag); + if (!raced_tag) { + init_idtag(idtag, type, tag); + goto out_inc; + } + idtag_free(idtag); + idtag = raced_tag; + } + + if (!idtag) + goto out; +out_inc: + atomic_inc(&idtag->count); +out: + spin_unlock(&idtag_lock); + return idtag; +} + +void put_tag(struct idtag *idtag) +{ + unsigned long tag; + if (!atomic_dec_and_lock(&idtag->count, &idtag_lock)) + return; + + tag = idtag->tag; + list_del(&idtag->idtag_hash_chain); + idtag_free(idtag); + if (idtag_unused(tag)) + free_pid(tag); + spin_unlock(&idtag_lock); +} + +int attach_tag(task_t *task, enum idtag_type type, unsigned long tag) +{ + struct idtag *idtag; + + idtag = get_tag(type, tag); + + if (!idtag) + return -ENOMEM; + + write_lock(&tasklist_lock); + list_add(&task->idtags[type].idtag_chain, &idtag->task_list); + task->idtags[type].tag = tag; + task->idtags[type].idtag = idtag; + write_unlock(&tasklist_lock); + + return 0; +} + +int detach_tag(task_t *task, enum idtag_type type) +{ + write_lock(&tasklist_lock); + list_del(&task->idtags[type].idtag_chain); + write_unlock(&tasklist_lock); + put_tag(task->idtags[type].idtag); + return 0; +} diff -Nru a/kernel/kmod.c b/kernel/kmod.c --- a/kernel/kmod.c Sun May 26 11:51:18 2002 +++ b/kernel/kmod.c Sun May 26 11:51:18 2002 @@ -123,11 +123,19 @@ /* Drop the "current user" thing */ { struct user_struct *user = curtask->user; + + write_lock(&user->lock); + atomic_dec(&user->processes); + list_del(&curtask->user_task_list); + write_unlock(&user->lock); + free_uid(user); + + write_lock(&INIT_USER->lock); curtask->user = INIT_USER; atomic_inc(&INIT_USER->__count); atomic_inc(&INIT_USER->processes); - atomic_dec(&user->processes); - free_uid(user); + list_add(&curtask->user_task_list, &INIT_USER->task_list); + write_unlock(&INIT_USER->lock); } /* Give kmod all effective privileges.. */ diff -Nru a/kernel/pid.c b/kernel/pid.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/kernel/pid.c Sun May 26 11:51:18 2002 @@ -0,0 +1,195 @@ +#include +#include +#include +#include +#include + +/* + * pid allocator + * (C) 2002 William Irwin, IBM + * + * The strategy is to maintain a tower of bitmaps where a bitmap above + * another in each bit accounts whether any pid's are available in the + * space tracked by BITS_PER_LONG bits of the level below. The bitmaps + * must be marked on allocation and also release, hence some + * infrastructure for detecting when the last user of a pid releases it + * must be in place. + */ + +static unsigned long pid_map0[MAP0_SIZE]; +static unsigned long pid_map1[MAP1_SIZE]; +static unsigned long pid_map2[MAP2_SIZE]; + +static unsigned long * pid_map[] = { pid_map0, pid_map1, pid_map2, NULL, }; + +unsigned long last_pid = 0; +unsigned long npids = 0; + +static const int map_shifts[] = + { 0, + BITS_PER_LONG_SHIFT, + BITS_PER_LONG_SHIFT*2, + BITS_PER_LONG_SHIFT*3, + BITS_PER_LONG_SHIFT*4, + }; + +static inline int pid_map_shift(int depth) +{ + return map_shifts[depth+1]; +} + +static spinlock_t pid_lock = SPIN_LOCK_UNLOCKED; + +void free_pid(unsigned long pid) +{ + unsigned long **map = pid_map; + + spin_lock(&pid_lock); + while (*map) { + int bit = pid & PID_MAP_MASK; + pid >>= BITS_PER_LONG_SHIFT; + __clear_bit(bit, &(*map)[pid]); + ++map; + } + --npids; + spin_unlock(&pid_lock); +} + +static inline int whole_block_used(int level, unsigned long pid) +{ + return pid_map[level][pid >> pid_map_shift(level)] == ~0UL; +} + +static inline void mark_pid(unsigned long pid) +{ + int level; + for (level = 0; level < PID_MAP_DEPTH; ++level) { + int shift, bit; + unsigned long entry; + + shift = pid_map_shift(level); + entry = pid >> shift; + bit = (pid >> (shift - BITS_PER_LONG_SHIFT)) & PID_MAP_MASK; + if (level == 0 || whole_block_used(level - 1, pid)) + __set_bit(bit, &pid_map[level][entry]); + else + break; + } + ++npids; +} + +static inline int pid_map_limit(int depth) +{ + return PID_MAX >> pid_map_shift(depth); +} + +#ifdef PID_ALLOC_EXAMPLE +/* + * the pid allocation traverses the bitmaps by recursively ffz'ing + * through down the tower of maps. Some additional logic is required + * to enforce lower limits, but the following example of how one + * would perform this search without the lower limit may well prove + * enlightening for those interested in the mechanics of the algorithm. + */ +static long alloc_pid_from_zero(void) +{ + unsigned long pid = 0; + int level; + + for (level = PID_MAP_DEPTH - 1; level >= 0; --level) { + unsigned long entry = pid_map[level][pid]; + + if (unlikely(entry == ~0UL)) + return ~0UL; + + pid = (pid << BITS_PER_LONG_SHIFT) + ffz(pid_map[level][pid]); + } + return pid; +} +#endif /* PID_ALLOC_EXAMPLE */ + + +static const unsigned long pid_max_levels[] = + { PID_MAX >> BITS_PER_LONG_SHIFT, + PID_MAX >> (BITS_PER_LONG_SHIFT*2), + PID_MAX >> (BITS_PER_LONG_SHIFT*3), + PID_MAX >> (BITS_PER_LONG_SHIFT*4), + }; + +static long alloc_pid_after(unsigned long limit) +{ + unsigned long pid = 0; + unsigned long lower_limit = limit; + int level; + + level = PID_MAP_DEPTH - 1; + while (level >= 0 && level < PID_MAP_DEPTH) { + unsigned long entry = 0; + unsigned long offset; + unsigned long shifted_limit; + + if (unlikely(pid >= pid_max_levels[level])) + return ~0UL; + + shifted_limit = lower_limit >> pid_map_shift(level); + + /* + * This test checks to see whether some bits of the + * entry we're examining need to be masked off in + * order to enforce the lower limit. + */ + if (likely(pid - shifted_limit >= BITS_PER_LONG)) { + + offset = (lower_limit >> pid_map_shift(level-1)); + entry |= (1UL << (offset & PID_MAP_MASK)) - 1; + pid = shifted_limit; + } + + entry |= pid_map[level][pid]; + + /* + * The lower limit enforcement may well have masked off + * bits otherwise available, and forced us to ascend. + * This feeds the address calculations the appropriate + * values so they'll retry the bucket after; this can + * only occur a bounded number of times, as this increment + * aligns it on boundaries preventing it from recurring. + */ + if (unlikely(entry == ~0UL)) { + pid >>= BITS_PER_LONG_SHIFT; + lower_limit >>= pid_map_shift(level); + ++lower_limit; + lower_limit <<= pid_map_shift(level); + ++level; + continue; + } + + pid = (pid << BITS_PER_LONG_SHIFT) + ffz(entry); + --level; + } + return pid ? pid : ~0UL; + +} + +int alloc_pid(void) +{ + unsigned long pid; + + spin_lock(&pid_lock); + pid = alloc_pid_after(last_pid); + if (unlikely(pid == ~0UL)) { + pid = alloc_pid_after(RESERVED_PIDS); + if (unlikely(pid == ~0UL)) + goto out; + } + last_pid = pid; + mark_pid(pid); +out: + spin_unlock(&pid_lock); + return (int)pid; +} + +void __init pid_init(void) +{ + mark_pid(0); +} diff -Nru a/kernel/sched.c b/kernel/sched.c --- a/kernel/sched.c Sun May 26 11:51:18 2002 +++ b/kernel/sched.c Sun May 26 11:51:18 2002 @@ -133,6 +133,7 @@ spinlock_t lock; spinlock_t frozen; unsigned long nr_running, nr_switches, expired_timestamp; + signed long nr_uninterruptible; task_t *curr, *idle; prio_array_t *active, *expired, arrays[2]; int prev_nr_running[NR_CPUS]; @@ -240,6 +241,8 @@ static inline void deactivate_task(struct task_struct *p, runqueue_t *rq) { rq->nr_running--; + if (p->state == TASK_UNINTERRUPTIBLE) + rq->nr_uninterruptible++; dequeue_task(p, p->array); p->array = NULL; } @@ -319,11 +322,16 @@ { unsigned long flags; int success = 0; + int uninterruptible = 0; runqueue_t *rq; rq = task_rq_lock(p, &flags); + if (p->state == TASK_UNINTERRUPTIBLE) + uninterruptible = 1; p->state = TASK_RUNNING; if (!p->array) { + if (uninterruptible) + rq->nr_uninterruptible--; activate_task(p, rq); if (p->prio < rq->curr->prio) resched_task(rq->curr); @@ -429,6 +437,16 @@ return sum; } +unsigned long nr_uninterruptible(void) +{ + unsigned long i, sum = 0; + + for (i = 0; i < smp_num_cpus; i++) + sum += cpu_rq(cpu_logical_map(i))->nr_uninterruptible; + + return sum; +} + unsigned long nr_context_switches(void) { unsigned long i, sum = 0; @@ -1584,6 +1602,8 @@ { runqueue_t *rq; int i, j, k; + extern void idtag_hash_init(void); + extern void pid_init(void); for (i = 0; i < NR_CPUS; i++) { runqueue_t *rq = cpu_rq(i); @@ -1624,6 +1644,9 @@ */ atomic_inc(&init_mm.mm_count); enter_lazy_tlb(&init_mm, current, smp_processor_id()); + + idtag_hash_init(); + pid_init(); } #if CONFIG_SMP diff -Nru a/kernel/signal.c b/kernel/signal.c --- a/kernel/signal.c Sun May 26 11:51:18 2002 +++ b/kernel/signal.c Sun May 26 11:51:18 2002 @@ -612,21 +612,35 @@ int kill_pg_info(int sig, struct siginfo *info, pid_t pgrp) { - int retval = -EINVAL; - if (pgrp > 0) { - struct task_struct *p; - - retval = -ESRCH; - read_lock(&tasklist_lock); - for_each_task(p) { - if (p->pgrp == pgrp && thread_group_leader(p)) { - int err = send_sig_info(sig, info, p); - if (retval) - retval = err; - } - } - read_unlock(&tasklist_lock); + task_t *task; + list_t *elem; + struct idtag *idtag; + int err, retval = -EINVAL; + + if (pgrp <= 0) + goto out; + + retval = -ESRCH; + + read_lock(&tasklist_lock); + idtag = find_tag(IDTAG_PGID, pgrp); + + if (!idtag) + goto out_unlock; + + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_PGID); + + if (!thread_group_leader(task)) + continue; + + err = send_sig_info(sig, info, task); + if (retval) + retval = err; } +out_unlock: + read_unlock(&tasklist_lock); +out: return retval; } @@ -637,23 +651,33 @@ */ int -kill_sl_info(int sig, struct siginfo *info, pid_t sess) +kill_sl_info(int sig, struct siginfo *info, pid_t sid) { - int retval = -EINVAL; - if (sess > 0) { - struct task_struct *p; - - retval = -ESRCH; - read_lock(&tasklist_lock); - for_each_task(p) { - if (p->leader && p->session == sess) { - int err = send_sig_info(sig, info, p); - if (retval) - retval = err; - } - } - read_unlock(&tasklist_lock); + int err, retval = -EINVAL; + struct idtag *idtag; + list_t *elem; + task_t *task; + + if (sid <= 0) + goto out; + + retval = -ESRCH; + read_lock(&tasklist_lock); + idtag = find_tag(IDTAG_SID, sid); + if (!idtag) + goto out_unlock; + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_SID); + if (!task->leader) + continue; + + err = send_sig_info(sig, info, task); + if (retval) + retval = err; } +out_unlock: + read_unlock(&tasklist_lock); +out: return retval; } @@ -661,20 +685,24 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid) { int error; - struct task_struct *p; + task_t *task; + task_t *tgrp_leader; read_lock(&tasklist_lock); - p = find_task_by_pid(pid); + task = find_task_by_pid(pid); error = -ESRCH; - if (p) { - if (!thread_group_leader(p)) { - struct task_struct *tg; - tg = find_task_by_pid(p->tgid); - if (tg) - p = tg; - } - error = send_sig_info(sig, info, p); - } + if (!task) + goto out_unlock; + + if (thread_group_leader(task)) + goto out_send_sig; + + tgrp_leader = find_task_by_pid(task->tgid); + if (tgrp_leader) + task = tgrp_leader; +out_send_sig: + error = send_sig_info(sig, info, task); +out_unlock: read_unlock(&tasklist_lock); return error; } diff -Nru a/kernel/sys.c b/kernel/sys.c --- a/kernel/sys.c Sun May 26 11:51:18 2002 +++ b/kernel/sys.c Sun May 26 11:51:18 2002 @@ -193,35 +193,35 @@ cond_syscall(sys_quotactl) cond_syscall(sys_acct) -static int proc_sel(struct task_struct *p, int which, int who) + +static int set_one_prio(task_t *task, int niceval, int error) { - if(p->pid) - { - switch (which) { - case PRIO_PROCESS: - if (!who && p == current) - return 1; - return(p->pid == who); - case PRIO_PGRP: - if (!who) - who = current->pgrp; - return(p->pgrp == who); - case PRIO_USER: - if (!who) - who = current->uid; - return(p->uid == who); - } + if (task->uid != current->euid && + task->uid != current->uid && !capable(CAP_SYS_NICE)) { + error = -EPERM; + goto out; } - return 0; + + if (error == -ESRCH) + error = 0; + if (niceval < task_nice(task) && !capable(CAP_SYS_NICE)) + error = -EACCES; + else + set_user_nice(task, niceval); +out: + return error; } asmlinkage long sys_setpriority(int which, int who, int niceval) { - struct task_struct *p; - int error; + task_t *task; + struct user_struct *user; + struct idtag *idtag; + list_t *elem; + int error = -EINVAL; if (which > 2 || which < 0) - return -EINVAL; + goto out; /* normalize: avoid signed division (rounding problems) */ error = -ESRCH; @@ -231,23 +231,51 @@ niceval = 19; read_lock(&tasklist_lock); - for_each_task(p) { - if (!proc_sel(p, which, who)) - continue; - if (p->uid != current->euid && - p->uid != current->uid && !capable(CAP_SYS_NICE)) { - error = -EPERM; - continue; - } - if (error == -ESRCH) - error = 0; - if (niceval < task_nice(p) && !capable(CAP_SYS_NICE)) - error = -EACCES; - else - set_user_nice(p, niceval); + switch (which) { + case PRIO_PROCESS: + if (!who) + who = current->pid; + idtag = find_tag(IDTAG_PID, who); + if (!idtag) + goto out_unlock; + + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_PID); + error = set_one_prio(task, niceval, error); + } + break; + case PRIO_PGRP: + if (!who) + who = current->pgrp; + idtag = find_tag(IDTAG_PGID, who); + if (!idtag) + goto out_unlock; + + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_PGID); + error = set_one_prio(task, niceval, error); + } + break; + case PRIO_USER: + if (!who) + user = current->user; + else + user = find_user(who); + + if (!user) + goto out_unlock; + + read_lock(&user->lock); + list_for_each(elem, &user->task_list) { + task = list_entry(elem, task_t, user_task_list); + error = set_one_prio(task, niceval, error); + } + read_unlock(&user->lock); + break; } +out_unlock: read_unlock(&tasklist_lock); - +out: return error; } @@ -259,21 +287,65 @@ */ asmlinkage long sys_getpriority(int which, int who) { - struct task_struct *p; - long retval = -ESRCH; + task_t *task; + list_t *elem; + struct idtag *idtag; + struct user_struct *user; + long niceval, retval = -ESRCH; if (which > 2 || which < 0) return -EINVAL; read_lock(&tasklist_lock); - for_each_task (p) { - long niceval; - if (!proc_sel(p, which, who)) - continue; - niceval = 20 - task_nice(p); - if (niceval > retval) - retval = niceval; + switch (which) { + case PRIO_PROCESS: + if (!who) + who = current->pid; + idtag = find_tag(IDTAG_PID, who); + if (!idtag) + goto out_unlock; + + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_PID); + niceval = 20 - task_nice(task); + if (niceval > retval) + retval = niceval; + } + break; + case PRIO_PGRP: + if (!who) + who = current->pgrp; + idtag = find_tag(IDTAG_PGID, who); + if (!idtag) + goto out_unlock; + + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_PGID); + niceval = 20 - task_nice(task); + if (niceval > retval) + retval = niceval; + } + break; + case PRIO_USER: + if (!who) + user = current->user; + else + user = find_user(who); + + if (!user) + goto out_unlock; + + read_lock(&user->lock); + list_for_each(elem, &user->task_list) { + task = list_entry(elem, task_t, user_task_list); + niceval = 20 - task_nice(task); + if (niceval > retval) + retval = niceval; + } + read_unlock(&user->lock); + break; } +out_unlock: read_unlock(&tasklist_lock); return retval; @@ -538,16 +610,24 @@ if (!new_user) return -EAGAIN; old_user = current->user; + + write_lock(&old_user->lock); atomic_dec(&old_user->processes); + list_del(¤t->user_task_list); + write_unlock(&old_user->lock); + + write_lock(&new_user->lock); atomic_inc(&new_user->processes); + list_add(¤t->user_task_list, &new_user->task_list); + current->uid = new_ruid; + current->user = new_user; + write_unlock(&new_user->lock); if(dumpclear) { current->mm->dumpable = 0; wmb(); } - current->uid = new_ruid; - current->user = new_user; free_uid(old_user); return 0; } @@ -853,7 +933,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) { - struct task_struct * p; + task_t *p; int err = -EINVAL; if (!pid) @@ -886,17 +966,27 @@ if (p->leader) goto out; if (pgid != pid) { - struct task_struct * tmp; - for_each_task (tmp) { - if (tmp->pgrp == pgid && - tmp->session == current->session) + task_t * task; + struct idtag *idtag; + list_t *elem; + + idtag = find_tag(IDTAG_PGID, pgid); + if (!idtag) + goto out; + + list_for_each(elem, &idtag->task_list) { + task = idtag_task(elem, IDTAG_PGID); + + if (task->session == current->session) goto ok_pgid; } goto out; } ok_pgid: + detach_tag(p, IDTAG_PGID); p->pgrp = pgid; + attach_tag(p, IDTAG_PGID, pgid); err = 0; out: /* All paths lead to here, thus we are safe. -DaveM */ @@ -950,17 +1040,22 @@ asmlinkage long sys_setsid(void) { - struct task_struct * p; + struct idtag *idtag; int err = -EPERM; read_lock(&tasklist_lock); - for_each_task(p) { - if (p->pgrp == current->pid) - goto out; - } + + idtag = find_tag(IDTAG_PGID, current->pid); + + if (idtag) + goto out; current->leader = 1; + detach_tag(current, IDTAG_PGID); + detach_tag(current, IDTAG_SID); current->session = current->pgrp = current->pid; + attach_tag(current, IDTAG_PGID, current->pid); + attach_tag(current, IDTAG_SID, current->pid); current->tty = NULL; current->tty_old_pgrp = 0; err = current->pgrp; diff -Nru a/kernel/timer.c b/kernel/timer.c --- a/kernel/timer.c Sun May 26 11:51:18 2002 +++ b/kernel/timer.c Sun May 26 11:51:18 2002 @@ -597,17 +597,7 @@ */ static unsigned long count_active_tasks(void) { - struct task_struct *p; - unsigned long nr = 0; - - read_lock(&tasklist_lock); - for_each_task(p) { - if ((p->state == TASK_RUNNING || - (p->state & TASK_UNINTERRUPTIBLE))) - nr += FIXED_1; - } - read_unlock(&tasklist_lock); - return nr; + return (nr_running() + nr_uninterruptible()) * FIXED_1; } /* diff -Nru a/kernel/user.c b/kernel/user.c --- a/kernel/user.c Sun May 26 11:51:18 2002 +++ b/kernel/user.c Sun May 26 11:51:18 2002 @@ -29,6 +29,8 @@ struct user_struct root_user = { __count: ATOMIC_INIT(1), processes: ATOMIC_INIT(1), + lock: RW_LOCK_UNLOCKED, + task_list: LIST_HEAD_INIT(root_user.task_list), files: ATOMIC_INIT(0) }; @@ -73,6 +75,11 @@ } } +struct user_struct *find_user(uid_t uid) +{ + return uid_hash_find(uid, uidhashentry(uid)); +} + void free_uid(struct user_struct *up) { if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) { @@ -101,6 +108,8 @@ atomic_set(&new->__count, 1); atomic_set(&new->processes, 0); atomic_set(&new->files, 0); + INIT_LIST_HEAD(&new->task_list); + rwlock_init(&new->lock); /* * Before adding this, check whether we raced