From: Oleg Nesterov New rules: ->_base & 1 : is timer pending ->_base & ~1 : where the timer was last scheduled ->_base == NULL means that this timer is not running on any cpu. del_timer_sync() clears ->_base, it is merely an optimization, so that subsequent del_timer calls do not need locking. del_timer_sync() assumes that the timer's base == (->_base & ~1) can't be changed during execution of timer->function. It is achieved by the next patch. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton --- 25-akpm/include/linux/timer.h | 19 ++++++++++- 25-akpm/kernel/timer.c | 69 +++++++++++++++++++++++++----------------- 2 files changed, 60 insertions(+), 28 deletions(-) diff -puN include/linux/timer.h~timers-rework-del_timer_sync include/linux/timer.h --- 25/include/linux/timer.h~timers-rework-del_timer_sync 2005-03-19 14:19:51.000000000 -0800 +++ 25-akpm/include/linux/timer.h 2005-03-19 14:19:51.000000000 -0800 @@ -21,9 +21,26 @@ struct timer_list { struct tvec_t_base_s *_base; }; +/* + * To save space, we play games with the least significant bit + * of timer_list->_base. + * + * If (_base & __TIMER_PENDING), the timer is pending. Implies + * (_base & ~__TIMER_PENDING) != NULL. + * + * (_base & ~__TIMER_PENDING), if != NULL, is the address of the + * timer's per-cpu tvec_t_base_s where it was last scheduled and + * where it may still be running. + * + * If (_base == NULL), the timer was not scheduled yet or it was + * cancelled by del_timer_sync(), so it is not running on any CPU. + */ + +#define __TIMER_PENDING 1 + static inline int __timer_pending(struct tvec_t_base_s *base) { - return base != NULL; + return ((unsigned long)base & __TIMER_PENDING) != 0; } #define TIMER_MAGIC 0x4b87ad6e diff -puN kernel/timer.c~timers-rework-del_timer_sync kernel/timer.c --- 25/kernel/timer.c~timers-rework-del_timer_sync 2005-03-19 14:19:51.000000000 -0800 +++ 25-akpm/kernel/timer.c 2005-03-19 14:19:51.000000000 -0800 @@ -88,16 +88,26 @@ static inline void set_running_timer(tve static inline tvec_base_t *__get_base(struct timer_list *timer) { - return timer->_base; + tvec_base_t *base = timer->_base; + + if (__timer_pending(base)) + return (void*)base - __TIMER_PENDING; + else + return NULL; } static inline void __set_base(struct timer_list *timer, tvec_base_t *base, int pending) { if (pending) - timer->_base = base; + timer->_base = (void*)base + __TIMER_PENDING; else - timer->_base = NULL; + timer->_base = base; +} + +static inline tvec_base_t *timer_base(struct timer_list *timer) +{ + return (tvec_base_t*)((unsigned long)timer->_base & ~__TIMER_PENDING); } /* Fake initialization */ @@ -346,41 +356,46 @@ EXPORT_SYMBOL(del_timer); * Synchronization rules: callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from * interrupt contexts. The caller must not hold locks which would prevent - * completion of the timer's handler. Upon exit the timer is not queued and - * the handler is not running on any CPU. + * completion of the timer's handler. The timer's handler must not call + * add_timer_on(). Upon exit the timer is not queued and the handler is + * not running on any CPU. * * The function returns whether it has deactivated a pending timer or not. - * - * del_timer_sync() is slow and complicated because it copes with timer - * handlers which re-arm the timer (periodic timers). If the timer handler - * is known to not do this (a single shot timer) then use - * del_singleshot_timer_sync() instead. */ int del_timer_sync(struct timer_list *timer) { - tvec_base_t *base; - int i, ret = 0; + int ret; check_timer(timer); -del_again: - ret += del_timer(timer); + ret = 0; + for (;;) { + unsigned long flags; + tvec_base_t *base; - for_each_online_cpu(i) { - base = &per_cpu(tvec_bases, i); - if (base->running_timer == timer) { - while (base->running_timer == timer) { - cpu_relax(); - preempt_check_resched(); - } - break; + base = timer_base(timer); + if (!base) + return ret; + + spin_lock_irqsave(&base->lock, flags); + + if (base->running_timer == timer) + goto unlock; + + if (timer_base(timer) != base) + goto unlock; + + if (timer_pending(timer)) { + list_del(&timer->entry); + ret = 1; } + /* Need to make sure that anybody who sees a NULL base + * also sees the list ops */ + smp_wmb(); + timer->_base = NULL; +unlock: + spin_unlock_irqrestore(&base->lock, flags); } - smp_rmb(); - if (timer_pending(timer)) - goto del_again; - - return ret; } EXPORT_SYMBOL(del_timer_sync); _