From: Christoph Lameter Avoid taking the tasklist_lock in sys_times if the process is single threaded. In a NUMA system taking the tasklist_lock may cause a bouncing cacheline if multiple independent processes continually call sys_times to measure their performance. Signed-off-by: Christoph Lameter Signed-off-by: Shai Fultheim Signed-off-by: Andrew Morton --- kernel/sys.c | 84 +++++++++++++++++++++++++++++++++++++++++------------------ 1 files changed, 59 insertions(+), 25 deletions(-) diff -puN kernel/sys.c~optimize-sys_times-for-a-single-thread-process kernel/sys.c --- 25/kernel/sys.c~optimize-sys_times-for-a-single-thread-process Wed May 18 14:10:47 2005 +++ 25-akpm/kernel/sys.c Wed May 18 14:10:47 2005 @@ -897,35 +897,69 @@ asmlinkage long sys_times(struct tms __u */ if (tbuf) { struct tms tmp; - struct task_struct *tsk = current; - struct task_struct *t; cputime_t utime, stime, cutime, cstime; - read_lock(&tasklist_lock); - utime = tsk->signal->utime; - stime = tsk->signal->stime; - t = tsk; - do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - t = next_thread(t); - } while (t != tsk); +#ifdef CONFIG_SMP + if (current == next_thread(current)) { + /* + * Single thread case without the use of any locks. + * + * We may race with release_task if two threads are + * executing. However, release task first adds up the + * counters (__exit_signal) before removing the task + * from the process tasklist (__unhash_process). + * __exit_signal also acquires and releases the + * siglock which results in the proper memory ordering + * so that the list modifications are always visible + * after the counters have been updated. + * + * If the counters have been updated by the second thread + * but the thread has not yet been removed from the list + * then the other branch will be executing which will + * block on tasklist_lock until the exit handling of the + * other task is finished. + * + * This also implies that the sighand->siglock cannot + * be held by another processor. So we can also + * skip acquiring that lock. + */ + utime = cputime_add(current->signal->utime, current->utime); + stime = cputime_add(current->signal->utime, current->stime); + cutime = current->signal->cutime; + cstime = current->signal->cstime; + } else +#endif + { - /* - * While we have tasklist_lock read-locked, no dying thread - * can be updating current->signal->[us]time. Instead, - * we got their counts included in the live thread loop. - * However, another thread can come in right now and - * do a wait call that updates current->signal->c[us]time. - * To make sure we always see that pair updated atomically, - * we take the siglock around fetching them. - */ - spin_lock_irq(&tsk->sighand->siglock); - cutime = tsk->signal->cutime; - cstime = tsk->signal->cstime; - spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); + /* Process with multiple threads */ + struct task_struct *tsk = current; + struct task_struct *t; + read_lock(&tasklist_lock); + utime = tsk->signal->utime; + stime = tsk->signal->stime; + t = tsk; + do { + utime = cputime_add(utime, t->utime); + stime = cputime_add(stime, t->stime); + t = next_thread(t); + } while (t != tsk); + + /* + * While we have tasklist_lock read-locked, no dying thread + * can be updating current->signal->[us]time. Instead, + * we got their counts included in the live thread loop. + * However, another thread can come in right now and + * do a wait call that updates current->signal->c[us]time. + * To make sure we always see that pair updated atomically, + * we take the siglock around fetching them. + */ + spin_lock_irq(&tsk->sighand->siglock); + cutime = tsk->signal->cutime; + cstime = tsk->signal->cstime; + spin_unlock_irq(&tsk->sighand->siglock); + read_unlock(&tasklist_lock); + } tmp.tms_utime = cputime_to_clock_t(utime); tmp.tms_stime = cputime_to_clock_t(stime); tmp.tms_cutime = cputime_to_clock_t(cutime); _