From: Ingo Molnar What i'd propose is the attached (tested, against 2.6.0-test2) patch instead. It unifies the functionality of add_timer() and mod_timer(), and makes any combination of the timer API calls completely SMP-safe. del_timer() is still not using the timer lock. this patch fixes the only timer bug in 2.6 i'm aware of: the del_timer_sync() + add_timer() combination in kernel/itimer.c is buggy. This was correct code in 2.4, because there it was safe to do an add_timer() from the timer handler itself, parallel to a del_timer_sync(). If we want to make this safe in 2.6 too (which i think we want to) then we have to make add_timer() almost equivalent to mod_timer(), locking-wise. And once we are at this point i think it's much cleaner to actually make add_timer() a variant of mod_timer(). (There's no locking cost for add_timer(), only the cost of an extra branch. And we've removed another commonly used function from the icache.) 25-akpm/include/linux/timer.h | 23 ++++++- 25-akpm/kernel/ksyms.c | 5 - 25-akpm/kernel/timer.c | 129 +++++++++++++++++------------------------- 3 files changed, 78 insertions(+), 79 deletions(-) diff -puN include/linux/timer.h~timer-race-fixes include/linux/timer.h --- 25/include/linux/timer.h~timer-race-fixes Fri Aug 1 13:00:49 2003 +++ 25-akpm/include/linux/timer.h Fri Aug 1 13:00:49 2003 @@ -60,11 +60,30 @@ static inline int timer_pending(const st return timer->base != NULL; } -extern void add_timer(struct timer_list * timer); extern void add_timer_on(struct timer_list *timer, int cpu); extern int del_timer(struct timer_list * timer); +extern int __mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer(struct timer_list *timer, unsigned long expires); - + +/*** + * add_timer - start a timer + * @timer: the timer to be added + * + * The kernel will do a ->function(->data) callback from the + * timer interrupt at the ->expired point in the future. The + * current time is 'jiffies'. + * + * The timer's ->expired, ->function (and if the handler uses it, ->data) + * fields must be set prior calling this function. + * + * Timers with an ->expired field in the past will be executed in the next + * timer tick. + */ +static inline void add_timer(struct timer_list * timer) +{ + __mod_timer(timer, timer->expires); +} + #ifdef CONFIG_SMP extern int del_timer_sync(struct timer_list * timer); #else diff -puN kernel/timer.c~timer-race-fixes kernel/timer.c --- 25/kernel/timer.c~timer-race-fixes Fri Aug 1 13:00:49 2003 +++ 25-akpm/kernel/timer.c Fri Aug 1 13:00:49 2003 @@ -144,34 +144,62 @@ static void internal_add_timer(tvec_base list_add_tail(&timer->entry, vec); } -/*** - * add_timer - start a timer - * @timer: the timer to be added - * - * The kernel will do a ->function(->data) callback from the - * timer interrupt at the ->expired point in the future. The - * current time is 'jiffies'. - * - * The timer's ->expired, ->function (and if the handler uses it, ->data) - * fields must be set prior calling this function. - * - * Timers with an ->expired field in the past will be executed in the next - * timer tick. It's illegal to add an already pending timer. - */ -void add_timer(struct timer_list *timer) +int __mod_timer(struct timer_list *timer, unsigned long expires) { - tvec_base_t *base = &per_cpu(tvec_bases, get_cpu()); - unsigned long flags; - - BUG_ON(timer_pending(timer) || !timer->function); + tvec_base_t *old_base, *new_base; + unsigned long flags; + int ret = 0; + + BUG_ON(!timer->function); check_timer(timer); - spin_lock_irqsave(&base->lock, flags); - internal_add_timer(base, timer); - timer->base = base; - spin_unlock_irqrestore(&base->lock, flags); - put_cpu(); + spin_lock_irqsave(&timer->lock, flags); + new_base = &per_cpu(tvec_bases, smp_processor_id()); +repeat: + old_base = timer->base; + + /* + * Prevent deadlocks via ordering by old_base < new_base. + */ + if (old_base && (new_base != old_base)) { + if (old_base < new_base) { + spin_lock(&new_base->lock); + spin_lock(&old_base->lock); + } else { + spin_lock(&old_base->lock); + spin_lock(&new_base->lock); + } + /* + * The timer base might have been cancelled while we were + * trying to take the lock(s): + */ + if (timer->base != old_base) { + spin_unlock(&new_base->lock); + spin_unlock(&old_base->lock); + goto repeat; + } + } else + spin_lock(&new_base->lock); + + /* + * Delete the previous timeout (if there was any), and install + * the new one: + */ + if (old_base) { + list_del(&timer->entry); + ret = 1; + } + timer->expires = expires; + internal_add_timer(new_base, timer); + timer->base = new_base; + + if (old_base && (new_base != old_base)) + spin_unlock(&old_base->lock); + spin_unlock(&new_base->lock); + spin_unlock_irqrestore(&timer->lock, flags); + + return ret; } /*** @@ -179,7 +207,7 @@ void add_timer(struct timer_list *timer) * @timer: the timer to be added * @cpu: the CPU to start it on * - * This is not very scalable on SMP. + * This is not very scalable on SMP. Double adds are not possible. */ void add_timer_on(struct timer_list *timer, int cpu) { @@ -217,10 +245,6 @@ void add_timer_on(struct timer_list *tim */ int mod_timer(struct timer_list *timer, unsigned long expires) { - tvec_base_t *old_base, *new_base; - unsigned long flags; - int ret = 0; - BUG_ON(!timer->function); check_timer(timer); @@ -233,52 +257,7 @@ int mod_timer(struct timer_list *timer, if (timer->expires == expires && timer_pending(timer)) return 1; - spin_lock_irqsave(&timer->lock, flags); - new_base = &per_cpu(tvec_bases, smp_processor_id()); -repeat: - old_base = timer->base; - - /* - * Prevent deadlocks via ordering by old_base < new_base. - */ - if (old_base && (new_base != old_base)) { - if (old_base < new_base) { - spin_lock(&new_base->lock); - spin_lock(&old_base->lock); - } else { - spin_lock(&old_base->lock); - spin_lock(&new_base->lock); - } - /* - * The timer base might have been cancelled while we were - * trying to take the lock(s): - */ - if (timer->base != old_base) { - spin_unlock(&new_base->lock); - spin_unlock(&old_base->lock); - goto repeat; - } - } else - spin_lock(&new_base->lock); - - /* - * Delete the previous timeout (if there was any), and install - * the new one: - */ - if (old_base) { - list_del(&timer->entry); - ret = 1; - } - timer->expires = expires; - internal_add_timer(new_base, timer); - timer->base = new_base; - - if (old_base && (new_base != old_base)) - spin_unlock(&old_base->lock); - spin_unlock(&new_base->lock); - spin_unlock_irqrestore(&timer->lock, flags); - - return ret; + return __mod_timer(timer, expires); } /*** diff -puN kernel/ksyms.c~timer-race-fixes kernel/ksyms.c --- 25/kernel/ksyms.c~timer-race-fixes Fri Aug 1 13:00:49 2003 +++ 25-akpm/kernel/ksyms.c Fri Aug 1 13:00:49 2003 @@ -404,8 +404,6 @@ EXPORT_SYMBOL(proc_doulongvec_ms_jiffies EXPORT_SYMBOL(proc_doulongvec_minmax); /* interrupt handling */ -EXPORT_SYMBOL(add_timer); -EXPORT_SYMBOL(del_timer); EXPORT_SYMBOL(request_irq); EXPORT_SYMBOL(free_irq); @@ -432,7 +430,10 @@ EXPORT_SYMBOL(probe_irq_off); #ifdef CONFIG_SMP EXPORT_SYMBOL(del_timer_sync); #endif +EXPORT_SYMBOL(add_timer); +EXPORT_SYMBOL(del_timer); EXPORT_SYMBOL(mod_timer); +EXPORT_SYMBOL(__mod_timer); #ifdef HAVE_DISABLE_HLT EXPORT_SYMBOL(disable_hlt); _