From: Mikael Pettersson This patch eliminates the illegal task_lock() perfctr's inheritance feature introduced in release_task(). - Changed __vperfctr_release() to use schedule_work() to do the task_lock(parent) etc in a different thread's context. This is because release_task() has a write lock on the task list lock, and task_lock() is forbidden in that case. When current == parent, this is bypassed and the merge work is done immediately without taking task_lock(). Added children_lock to struct vperfctr, to synchronise accesses (release/update_control/read) to the children array. Signed-off-by: Mikael Pettersson Signed-off-by: Andrew Morton --- 25-akpm/Documentation/perfctr/virtual.txt | 12 +-- 25-akpm/drivers/perfctr/version.h | 2 25-akpm/drivers/perfctr/virtual.c | 97 ++++++++++++++++++++---------- 3 files changed, 70 insertions(+), 41 deletions(-) diff -puN Documentation/perfctr/virtual.txt~perfctr-inheritance-locking-fix Documentation/perfctr/virtual.txt --- 25/Documentation/perfctr/virtual.txt~perfctr-inheritance-locking-fix Fri Nov 12 16:19:56 2004 +++ 25-akpm/Documentation/perfctr/virtual.txt Fri Nov 12 16:19:57 2004 @@ -1,4 +1,4 @@ -$Id: perfctr-documentation-update.patch,v 1.1 2004/07/12 05:41:57 akpm Exp $ +$Id: virtual.txt,v 1.3 2004/08/09 09:42:22 mikpe Exp $ VIRTUAL PER-PROCESS PERFORMANCE COUNTERS ======================================== @@ -140,13 +140,9 @@ There are five types of accesses to a th before accessing the perfctr pointer. 5. release_task(). - While reaping a child, the kernel only takes the tasklist_lock to - prevent the parent from changing or disappearing. This does not - prevent the parent's perfctr state pointer from changing. Concurrent - accesses to the parent's "children counts" state are also possible. - - To avoid these issues, perfctr_release_task() performs a task_lock() - on the parent. + Reaping a child may or may not be done by the parent of that child. + When done by the parent, no lock is taken. Otherwise, a task_lock() + on the parent is done before accessing its thread's perfctr pointer. The Pseudo File System ---------------------- diff -puN drivers/perfctr/version.h~perfctr-inheritance-locking-fix drivers/perfctr/version.h --- 25/drivers/perfctr/version.h~perfctr-inheritance-locking-fix Fri Nov 12 16:19:56 2004 +++ 25-akpm/drivers/perfctr/version.h Fri Nov 12 16:19:57 2004 @@ -1 +1 @@ -#define VERSION "2.7.4" +#define VERSION "2.7.5" diff -puN drivers/perfctr/virtual.c~perfctr-inheritance-locking-fix drivers/perfctr/virtual.c --- 25/drivers/perfctr/virtual.c~perfctr-inheritance-locking-fix Fri Nov 12 16:19:56 2004 +++ 25-akpm/drivers/perfctr/virtual.c Fri Nov 12 16:19:57 2004 @@ -1,4 +1,4 @@ -/* $Id: virtual.c,v 1.95 2004/05/31 20:36:37 mikpe Exp $ +/* $Id: virtual.c,v 1.104 2004/08/09 09:42:22 mikpe Exp $ * Virtual per-process performance counters. * * Copyright (C) 1999-2004 Mikael Pettersson @@ -42,10 +42,15 @@ struct vperfctr { #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT unsigned int iresume_cstatus; #endif - /* protected by task_lock(owner) */ + /* children_lock protects inheritance_id and children, + when parent is not the one doing release_task() */ + spinlock_t children_lock; unsigned long long inheritance_id; struct perfctr_sum_ctrs children; - struct work_struct free_work; + /* schedule_work() data for when an operation cannot be + done in the current context due to locking rules */ + struct work_struct work; + struct task_struct *parent_tsk; }; #define IS_RUNNING(perfctr) perfctr_cstatus_enabled((perfctr)->cpu_state.cstatus) @@ -172,8 +177,8 @@ static void schedule_put_vperfctr(struct { if (!atomic_dec_and_test(&perfctr->count)) return; - INIT_WORK(&perfctr->free_work, scheduled_vperfctr_free, perfctr); - schedule_work(&perfctr->free_work); + INIT_WORK(&perfctr->work, scheduled_vperfctr_free, perfctr); + schedule_work(&perfctr->work); } static unsigned long long new_inheritance_id(void) @@ -396,39 +401,67 @@ void __vperfctr_exit(struct vperfctr *pe * A task is being released. If it inherited its perfctr settings * from its parent, then merge its final counts back into the parent. * Then unlink the child's perfctr. - * PRE: caller holds tasklist_lock. + * PRE: caller has write_lock_irq(&tasklist_lock). * PREEMPT note: preemption is disabled due to tasklist_lock. + * + * When current == parent_tsk, the child's counts can be merged + * into the parent's immediately. This is the common case. + * + * When current != parent_tsk, the parent must be task_lock()ed + * before its perfctr state can be accessed. task_lock() is illegal + * here due to the write_lock_irq(&tasklist_lock) in release_task(), + * so the operation is done via schedule_work(). */ -void __vperfctr_release(struct task_struct *child_tsk) +static void do_vperfctr_release(struct vperfctr *child_perfctr, struct task_struct *parent_tsk) { - struct task_struct *parent_tsk; struct vperfctr *parent_perfctr; - struct vperfctr *child_perfctr; unsigned int cstatus, nrctrs, i; - /* Our caller holds tasklist_lock, protecting child_tsk->parent. - We must also task_lock(child_tsk->parent), to prevent its - ->thread.perfctr from changing. */ - parent_tsk = child_tsk->parent; - task_lock(parent_tsk); parent_perfctr = parent_tsk->thread.perfctr; - child_perfctr = child_tsk->thread.perfctr; - if (parent_perfctr && child_perfctr && - parent_perfctr->inheritance_id == child_perfctr->inheritance_id) { - cstatus = parent_perfctr->cpu_state.cstatus; - if (perfctr_cstatus_has_tsc(cstatus)) - parent_perfctr->children.tsc += - child_perfctr->cpu_state.tsc_sum + - child_perfctr->children.tsc; - nrctrs = perfctr_cstatus_nrctrs(cstatus); - for(i = 0; i < nrctrs; ++i) - parent_perfctr->children.pmc[i] += - child_perfctr->cpu_state.pmc[i].sum + - child_perfctr->children.pmc[i]; + if (parent_perfctr && child_perfctr) { + spin_lock(&parent_perfctr->children_lock); + if (parent_perfctr->inheritance_id == child_perfctr->inheritance_id) { + cstatus = parent_perfctr->cpu_state.cstatus; + if (perfctr_cstatus_has_tsc(cstatus)) + parent_perfctr->children.tsc += + child_perfctr->cpu_state.tsc_sum + + child_perfctr->children.tsc; + nrctrs = perfctr_cstatus_nrctrs(cstatus); + for(i = 0; i < nrctrs; ++i) + parent_perfctr->children.pmc[i] += + child_perfctr->cpu_state.pmc[i].sum + + child_perfctr->children.pmc[i]; + } + spin_unlock(&parent_perfctr->children_lock); } + schedule_put_vperfctr(child_perfctr); +} + +static void scheduled_release(void *data) +{ + struct vperfctr *child_perfctr = data; + struct task_struct *parent_tsk = child_perfctr->parent_tsk; + + task_lock(parent_tsk); + do_vperfctr_release(child_perfctr, parent_tsk); task_unlock(parent_tsk); + put_task_struct(parent_tsk); +} + +void __vperfctr_release(struct task_struct *child_tsk) +{ + struct task_struct *parent_tsk = child_tsk->parent; + struct vperfctr *child_perfctr = child_tsk->thread.perfctr; + child_tsk->thread.perfctr = NULL; - schedule_put_vperfctr(child_perfctr); + if (parent_tsk == current) + do_vperfctr_release(child_perfctr, parent_tsk); + else { + get_task_struct(parent_tsk); + INIT_WORK(&child_perfctr->work, scheduled_release, child_perfctr); + child_perfctr->parent_tsk = parent_tsk; + schedule_work(&child_perfctr->work); + } } /* schedule() --> switch_to() --> .. --> __vperfctr_suspend(). @@ -574,10 +607,10 @@ static int do_vperfctr_control(struct vp if (!(control->preserve & (1<cpu_state.pmc[i].sum = 0; - task_lock(tsk); + spin_lock(&perfctr->children_lock); perfctr->inheritance_id = new_inheritance_id(); memset(&perfctr->children, 0, sizeof perfctr->children); - task_unlock(tsk); + spin_unlock(&perfctr->children_lock); if (tsk == current) vperfctr_resume(perfctr); @@ -678,10 +711,10 @@ static int do_vperfctr_read(struct vperf } if (childrenp) { if (tsk) - task_lock(tsk); + spin_lock(&perfctr->children_lock); tmp->children = perfctr->children; if (tsk) - task_unlock(tsk); + spin_unlock(&perfctr->children_lock); } if (tsk == current) preempt_enable(); _