diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/kernel/apic.c x/arch/i386/kernel/apic.c --- x-ref/arch/i386/kernel/apic.c 2003-06-13 22:07:23.000000000 +0200 +++ x/arch/i386/kernel/apic.c 2003-09-02 02:39:19.000000000 +0200 @@ -1092,6 +1092,9 @@ void smp_apic_timer_interrupt(struct pt_ smp_local_timer_interrupt(regs); irq_exit(cpu, 0); +#if CONFIG_SMP + run_local_timers(); +#endif if (softirq_pending(cpu)) do_softirq(); } diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/i386/mm/fault.c x/arch/i386/mm/fault.c --- x-ref/arch/i386/mm/fault.c 2003-09-02 02:39:18.000000000 +0200 +++ x/arch/i386/mm/fault.c 2003-09-02 02:39:19.000000000 +0200 @@ -95,16 +95,12 @@ out_of_memory: goto bad_area; } -extern spinlock_t timerlist_lock; - /* * Unlock any spinlocks which will prevent us from getting the - * message out (timerlist_lock is acquired through the - * console unblank code) + * message out */ void bust_spinlocks(int yes) { - spin_lock_init(&timerlist_lock); if (yes) { oops_in_progress = 1; #ifdef CONFIG_SMP diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/ia64/kernel/smp.c x/arch/ia64/kernel/smp.c --- x-ref/arch/ia64/kernel/smp.c 2003-06-13 22:07:23.000000000 +0200 +++ x/arch/ia64/kernel/smp.c 2003-09-02 02:39:19.000000000 +0200 @@ -316,6 +316,7 @@ smp_do_timer (struct pt_regs *regs) if (--local_cpu_data->prof_counter <= 0) { local_cpu_data->prof_counter = local_cpu_data->prof_multiplier; update_process_times(user); + run_local_timers(); } } diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/ia64/kernel/traps.c x/arch/ia64/kernel/traps.c --- x-ref/arch/ia64/kernel/traps.c 2003-08-26 00:12:44.000000000 +0200 +++ x/arch/ia64/kernel/traps.c 2003-09-02 02:39:19.000000000 +0200 @@ -41,8 +41,6 @@ register double f30 asm ("f30"); registe #include -extern spinlock_t timerlist_lock; - static fpswa_interface_t *fpswa_interface; void __init @@ -66,7 +64,6 @@ trap_init (void) void bust_spinlocks (int yes) { - spin_lock_init(&timerlist_lock); if (yes) { oops_in_progress = 1; #ifdef CONFIG_SMP diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/sparc/kernel/sun4c_irq.c x/arch/sparc/kernel/sun4c_irq.c --- x-ref/arch/sparc/kernel/sun4c_irq.c 2003-06-13 22:07:24.000000000 +0200 +++ x/arch/sparc/kernel/sun4c_irq.c 2003-09-02 02:39:19.000000000 +0200 @@ -155,7 +155,7 @@ static void sun4c_load_profile_irq(int c /* Errm.. not sure how to do this.. */ } -static void __init sun4c_init_timers(void (*counter_fn)(int, void *, struct pt_regs *)) +static void __init sun4c_sparc_init_timers(void (*counter_fn)(int, void *, struct pt_regs *)) { int irq; @@ -234,7 +234,7 @@ void __init sun4c_init_IRQ(void) BTFIXUPSET_CALL(clear_profile_irq, sun4c_clear_profile_irq, BTFIXUPCALL_NOP); BTFIXUPSET_CALL(load_profile_irq, sun4c_load_profile_irq, BTFIXUPCALL_NOP); BTFIXUPSET_CALL(__irq_itoa, sun4m_irq_itoa, BTFIXUPCALL_NORM); - sparc_init_timers = sun4c_init_timers; + sparc_init_timers = sun4c_sparc_init_timers; #ifdef CONFIG_SMP BTFIXUPSET_CALL(set_cpu_int, sun4c_nop, BTFIXUPCALL_NOP); BTFIXUPSET_CALL(clear_cpu_int, sun4c_nop, BTFIXUPCALL_NOP); diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/sparc/kernel/sun4d_irq.c x/arch/sparc/kernel/sun4d_irq.c --- x-ref/arch/sparc/kernel/sun4d_irq.c 2003-06-13 22:07:24.000000000 +0200 +++ x/arch/sparc/kernel/sun4d_irq.c 2003-09-02 02:39:19.000000000 +0200 @@ -444,7 +444,7 @@ static void sun4d_load_profile_irq(int c bw_set_prof_limit(cpu, limit); } -static void __init sun4d_init_timers(void (*counter_fn)(int, void *, struct pt_regs *)) +static void __init sun4d_sparc_init_timers(void (*counter_fn)(int, void *, struct pt_regs *)) { int irq; extern struct prom_cpuinfo linux_cpus[NR_CPUS]; @@ -556,7 +556,7 @@ void __init sun4d_init_IRQ(void) BTFIXUPSET_CALL(clear_profile_irq, sun4d_clear_profile_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, sun4d_load_profile_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(__irq_itoa, sun4d_irq_itoa, BTFIXUPCALL_NORM); - sparc_init_timers = sun4d_init_timers; + sparc_init_timers = sun4d_sparc_init_timers; #ifdef CONFIG_SMP BTFIXUPSET_CALL(set_cpu_int, sun4d_set_cpu_int, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(clear_cpu_int, sun4d_clear_ipi, BTFIXUPCALL_NOP); diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/sparc/kernel/sun4m_irq.c x/arch/sparc/kernel/sun4m_irq.c --- x-ref/arch/sparc/kernel/sun4m_irq.c 2003-06-13 22:07:24.000000000 +0200 +++ x/arch/sparc/kernel/sun4m_irq.c 2003-09-02 02:39:19.000000000 +0200 @@ -235,7 +235,7 @@ char *sun4m_irq_itoa(unsigned int irq) return buff; } -static void __init sun4m_init_timers(void (*counter_fn)(int, void *, struct pt_regs *)) +static void __init sun4m_sparc_init_timers(void (*counter_fn)(int, void *, struct pt_regs *)) { int reg_count, irq, cpu; struct linux_prom_registers cnt_regs[PROMREG_MAX]; @@ -387,7 +387,7 @@ void __init sun4m_init_IRQ(void) BTFIXUPSET_CALL(clear_profile_irq, sun4m_clear_profile_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(load_profile_irq, sun4m_load_profile_irq, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(__irq_itoa, sun4m_irq_itoa, BTFIXUPCALL_NORM); - sparc_init_timers = sun4m_init_timers; + sparc_init_timers = sun4m_sparc_init_timers; #ifdef CONFIG_SMP BTFIXUPSET_CALL(set_cpu_int, sun4m_send_ipi, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(clear_cpu_int, sun4m_clear_ipi, BTFIXUPCALL_NORM); diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/x86_64/kernel/apic.c x/arch/x86_64/kernel/apic.c --- x-ref/arch/x86_64/kernel/apic.c 2003-08-26 00:12:51.000000000 +0200 +++ x/arch/x86_64/kernel/apic.c 2003-09-02 02:39:19.000000000 +0200 @@ -955,6 +955,9 @@ void smp_apic_timer_interrupt(struct pt_ smp_local_timer_interrupt(regs); irq_exit(cpu, 0); +#if CONFIG_SMP + run_local_timers(); +#endif if (softirq_pending(cpu)) do_softirq(); } diff -urNp --exclude CVS --exclude BitKeeper x-ref/arch/x86_64/mm/fault.c x/arch/x86_64/mm/fault.c --- x-ref/arch/x86_64/mm/fault.c 2003-08-26 00:12:51.000000000 +0200 +++ x/arch/x86_64/mm/fault.c 2003-09-02 02:39:19.000000000 +0200 @@ -34,7 +34,6 @@ extern spinlock_t console_lock, timerlis void bust_spinlocks(int yes) { - spin_lock_init(&timerlist_lock); if (yes) { oops_in_progress = 1; #ifdef CONFIG_SMP diff -urNp --exclude CVS --exclude BitKeeper x-ref/drivers/net/eepro100.c x/drivers/net/eepro100.c --- x-ref/drivers/net/eepro100.c 2003-09-02 02:39:18.000000000 +0200 +++ x/drivers/net/eepro100.c 2003-09-02 02:39:19.000000000 +0200 @@ -1224,9 +1224,6 @@ static void speedo_timer(unsigned long d /* We must continue to monitor the media. */ sp->timer.expires = RUN_AT(2*HZ); /* 2.0 sec. */ add_timer(&sp->timer); -#if defined(timer_exit) - timer_exit(&sp->timer); -#endif } static void speedo_show_state(struct net_device *dev) diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/smp.h x/include/linux/smp.h --- x-ref/include/linux/smp.h 2003-09-02 02:39:18.000000000 +0200 +++ x/include/linux/smp.h 2003-09-02 02:39:19.000000000 +0200 @@ -76,7 +76,8 @@ extern volatile int smp_msg_id; /* * These macros fold the SMP functionality into a single CPU system */ - + +#define NR_CPUS 1 #define smp_num_cpus 1 #define smp_processor_id() 0 #define hard_smp_processor_id() 0 diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/timer.h x/include/linux/timer.h --- x-ref/include/linux/timer.h 2003-07-18 06:07:45.000000000 +0200 +++ x/include/linux/timer.h 2003-09-02 02:39:19.000000000 +0200 @@ -1,9 +1,6 @@ #ifndef _LINUX_TIMER_H #define _LINUX_TIMER_H -#include -#include - /* * In Linux 2.4, static timers have been removed from the kernel. * Timers may be dynamically created and destroyed, and should be initialized @@ -13,22 +10,80 @@ * timeouts. You can use this field to distinguish between the different * invocations. */ + +#include +#include +#include +#include + +/* + * Event timer code + */ +#define TVN_BITS 6 +#define TVR_BITS 8 +#define TVN_SIZE (1 << TVN_BITS) +#define TVR_SIZE (1 << TVR_BITS) +#define TVN_MASK (TVN_SIZE - 1) +#define TVR_MASK (TVR_SIZE - 1) + +typedef struct tvec_s { + int index; + struct list_head vec[TVN_SIZE]; +} tvec_t; + +typedef struct tvec_root_s { + int index; + struct list_head vec[TVR_SIZE]; +} tvec_root_t; + +#define NOOF_TVECS 5 + +typedef struct timer_list timer_t; + +typedef struct tvec_t_base_s { + spinlock_t lock; + unsigned long timer_jiffies; + volatile timer_t * volatile running_timer; + struct list_head * run_timer_list_running; + tvec_root_t tv1; + tvec_t tv2; + tvec_t tv3; + tvec_t tv4; + tvec_t tv5; +} tvec_base_t; + +/* + * This is the new and improved way of handling timers. + * + * The "data" field is in case you want to use the same + * timeout function for several timeouts. You can use this + * to distinguish between the different invocations. + */ + struct timer_list { struct list_head list; unsigned long expires; unsigned long data; void (*function)(unsigned long); + tvec_base_t *base; + unsigned long lock; }; -extern void add_timer(struct timer_list * timer); -extern int del_timer(struct timer_list * timer); +extern void add_timer(timer_t * timer); +extern int del_timer(timer_t * timer); #ifdef CONFIG_SMP -extern int del_timer_sync(struct timer_list * timer); +extern int del_timer_sync(timer_t * timer); extern void sync_timers(void); +#define timer_enter(base, t) do { base->running_timer = t; mb(); } while (0) +#define timer_exit(base) do { base->running_timer = NULL; } while (0) +#define timer_is_running(base,t) (base->running_timer == t) +#define timer_synchronize(base,t) while (timer_is_running(base,t)) barrier() #else #define del_timer_sync(t) del_timer(t) #define sync_timers() do { } while (0) +#define timer_enter(base,t) do { } while (0) +#define timer_exit(base) do { } while (0) #endif /* @@ -38,17 +93,34 @@ extern void sync_timers(void); * If the timer is known to be not pending (ie, in the handler), mod_timer * is less efficient than a->expires = b; add_timer(a). */ -int mod_timer(struct timer_list *timer, unsigned long expires); +int mod_timer(timer_t *timer, unsigned long expires); extern void it_real_fn(unsigned long); -static inline void init_timer(struct timer_list * timer) +extern void init_timers(void); +extern void run_local_timers(void); + +extern tvec_base_t tvec_bases[NR_CPUS]; + +static inline void init_timer(timer_t * timer) { timer->list.next = timer->list.prev = NULL; + timer->base = NULL; + timer->lock = 0; } -static inline int timer_pending (const struct timer_list * timer) +#define TIMER_DEBUG 0 +#if TIMER_DEBUG +# define CHECK_BASE(base) \ + if (base && ((base < tvec_bases) || (base >= tvec_bases + NR_CPUS))) \ + BUG() +#else +# define CHECK_BASE(base) +#endif + +static inline int timer_pending(const timer_t * timer) { + CHECK_BASE(timer->base); return timer->list.next != NULL; } diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/ksyms.c x/kernel/ksyms.c --- x-ref/kernel/ksyms.c 2003-09-02 02:39:18.000000000 +0200 +++ x/kernel/ksyms.c 2003-09-02 02:39:19.000000000 +0200 @@ -403,6 +403,7 @@ EXPORT_SYMBOL(probe_irq_off); EXPORT_SYMBOL(del_timer_sync); #endif EXPORT_SYMBOL(mod_timer); +EXPORT_SYMBOL(tvec_bases); EXPORT_SYMBOL(tq_timer); EXPORT_SYMBOL(tq_immediate); diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/sched.c x/kernel/sched.c --- x-ref/kernel/sched.c 2003-09-02 02:39:18.000000000 +0200 +++ x/kernel/sched.c 2003-09-02 02:39:19.000000000 +0200 @@ -1595,7 +1595,6 @@ void __init init_idle(task_t *idle, int __restore_flags(flags); } -extern void init_timervecs(void); extern void timer_bh(void); extern void tqueue_bh(void); extern void immediate_bh(void); @@ -1634,8 +1633,7 @@ void __init sched_init(void) current->cpu = smp_processor_id(); wake_up_process(current); - init_timervecs(); - init_bh(TIMER_BH, timer_bh); + init_timers(); init_bh(TQUEUE_BH, tqueue_bh); init_bh(IMMEDIATE_BH, immediate_bh); diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/timer.c x/kernel/timer.c --- x-ref/kernel/timer.c 2003-09-02 02:39:18.000000000 +0200 +++ x/kernel/timer.c 2003-09-02 02:40:23.000000000 +0200 @@ -13,10 +13,15 @@ * serialize accesses to xtime/lost_ticks). * Copyright (C) 1998 Andrea Arcangeli * 1999-03-10 Improved NTP compatibility by Ulrich Windl + * 2000-10-05 Implemented scalable SMP per-CPU timer handling. + * Copyright (C) 2000 Ingo Molnar + * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar */ #include + #include +#include #include #include #include @@ -73,87 +78,51 @@ unsigned int * prof_buffer; unsigned long prof_len; unsigned long prof_shift; -/* - * Event timer code - */ -#define TVN_BITS 6 -#define TVR_BITS 8 -#define TVN_SIZE (1 << TVN_BITS) -#define TVR_SIZE (1 << TVR_BITS) -#define TVN_MASK (TVN_SIZE - 1) -#define TVR_MASK (TVR_SIZE - 1) - -struct timer_vec { - int index; - struct list_head vec[TVN_SIZE]; -}; - -struct timer_vec_root { - int index; - struct list_head vec[TVR_SIZE]; -}; - -static struct timer_vec tv5; -static struct timer_vec tv4; -static struct timer_vec tv3; -static struct timer_vec tv2; -static struct timer_vec_root tv1; - -static struct timer_vec * const tvecs[] = { - (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5 -}; - -static struct list_head * run_timer_list_running; - -#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0])) - -void init_timervecs (void) -{ - int i; +tvec_base_t tvec_bases[NR_CPUS]; - for (i = 0; i < TVN_SIZE; i++) { - INIT_LIST_HEAD(tv5.vec + i); - INIT_LIST_HEAD(tv4.vec + i); - INIT_LIST_HEAD(tv3.vec + i); - INIT_LIST_HEAD(tv2.vec + i); - } - for (i = 0; i < TVR_SIZE; i++) - INIT_LIST_HEAD(tv1.vec + i); -} +/* jiffies at the most recent update of wall time */ +unsigned long wall_jiffies; -static unsigned long timer_jiffies; +/* + * This spinlock protect us from races in SMP while playing with xtime. -arca + */ +rwlock_t xtime_lock = RW_LOCK_UNLOCKED; -static inline void internal_add_timer(struct timer_list *timer) +/* + * This is the 'global' timer BH. This gets called only if one of + * the local timer interrupts couldnt run timers. + */ +static inline void internal_add_timer(tvec_base_t *base, timer_t *timer) { /* * must be cli-ed when calling this */ unsigned long expires = timer->expires; - unsigned long idx = expires - timer_jiffies; + unsigned long idx = expires - base->timer_jiffies; struct list_head * vec; - if (run_timer_list_running) - vec = run_timer_list_running; + if (base->run_timer_list_running) + vec = base->run_timer_list_running; else if (idx < TVR_SIZE) { int i = expires & TVR_MASK; - vec = tv1.vec + i; + vec = base->tv1.vec + i; } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { int i = (expires >> TVR_BITS) & TVN_MASK; - vec = tv2.vec + i; + vec = base->tv2.vec + i; } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; - vec = tv3.vec + i; + vec = base->tv3.vec + i; } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; - vec = tv4.vec + i; + vec = base->tv4.vec + i; } else if ((signed long) idx < 0) { /* can happen if you add a timer with expires == jiffies, * or you set a timer to go off in the past */ - vec = tv1.vec + tv1.index; + vec = base->tv1.vec + base->tv1.index; } else if (idx <= 0xffffffffUL) { int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; - vec = tv5.vec + i; + vec = base->tv5.vec + i; } else { /* Can only get here on architectures with 64-bit jiffies */ INIT_LIST_HEAD(&timer->list); @@ -165,37 +134,33 @@ static inline void internal_add_timer(st list_add(&timer->list, vec->prev); } -/* Initialize both explicitly - let's try to have them in the same cache line */ -spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED; - -#ifdef CONFIG_SMP -volatile struct timer_list * volatile running_timer; -#define timer_enter(t) do { running_timer = t; mb(); } while (0) -#define timer_exit() do { running_timer = NULL; } while (0) -#define timer_is_running(t) (running_timer == t) -#define timer_synchronize(t) while (timer_is_running(t)) cpu_relax() -#else -#define timer_enter(t) do { } while (0) -#define timer_exit() do { } while (0) -#endif - -void add_timer(struct timer_list *timer) +void add_timer(timer_t *timer) { + tvec_base_t * base = tvec_bases + smp_processor_id(); unsigned long flags; - spin_lock_irqsave(&timerlist_lock, flags); - if (timer_pending(timer)) - goto bug; - internal_add_timer(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); - return; -bug: - spin_unlock_irqrestore(&timerlist_lock, flags); - printk("bug: kernel timer added twice at %p.\n", - __builtin_return_address(0)); + CHECK_BASE(base); + CHECK_BASE(timer->base); + + local_irq_save(flags); + while (unlikely(test_and_set_bit(0, &timer->lock))) + while (test_bit(0, &timer->lock)) + cpu_relax(); + + spin_lock(&base->lock); + if (timer_pending(timer)) { + printk("bug: kernel timer added twice at %p.\n", + __builtin_return_address(0)); + } else { + internal_add_timer(base, timer); + timer->base = base; + } + spin_unlock(&base->lock); + clear_bit(0, &timer->lock); + local_irq_restore(flags); } -static inline int detach_timer (struct timer_list *timer) +static inline int detach_timer(timer_t *timer) { if (!timer_pending(timer)) return 0; @@ -203,28 +168,106 @@ static inline int detach_timer (struct t return 1; } -int mod_timer(struct timer_list *timer, unsigned long expires) +/* + * mod_timer() has subtle locking semantics because parallel + * calls to it must happen serialized. + */ +int mod_timer(timer_t *timer, unsigned long expires) { - int ret; + tvec_base_t *old_base, *new_base; unsigned long flags; + int ret; + + /* + * This is a common optimization triggered by the + * networking code - if the timer is re-modified + * to be the same thing then just return: + */ + if (timer->expires == expires && timer_pending(timer)) + return 1; + + new_base = tvec_bases + smp_processor_id(); + CHECK_BASE(new_base); + + BUG_ON(timer->lock & ~1UL); + + local_irq_save(flags); + while (unlikely(test_and_set_bit(0, &timer->lock))) + while (test_bit(0, &timer->lock)) + cpu_relax(); + +repeat: + old_base = timer->base; + CHECK_BASE(old_base); + + /* + * Prevent deadlocks via ordering by old_base < new_base. + */ + if (old_base && (new_base != old_base)) { + if (old_base < new_base) { + spin_lock(&new_base->lock); + spin_lock(&old_base->lock); + } else { + spin_lock(&old_base->lock); + spin_lock(&new_base->lock); + } + /* + * Subtle, we rely on timer->base being always + * valid and being updated atomically. + */ + if (timer->base != old_base) { + spin_unlock(&new_base->lock); + spin_unlock(&old_base->lock); + goto repeat; + } + } else + spin_lock(&new_base->lock); - spin_lock_irqsave(&timerlist_lock, flags); timer->expires = expires; ret = detach_timer(timer); - internal_add_timer(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); + internal_add_timer(new_base, timer); + timer->base = new_base; + + + if (old_base && (new_base != old_base)) + spin_unlock(&old_base->lock); + spin_unlock(&new_base->lock); + + clear_bit(0, &timer->lock); + local_irq_restore(flags); + return ret; } -int del_timer(struct timer_list * timer) +int del_timer(timer_t * timer) { - int ret; unsigned long flags; + tvec_base_t * base; + int ret; + + CHECK_BASE(timer->base); + if (!timer->base) + return 0; + + local_irq_save(flags); + while (unlikely(test_and_set_bit(0, &timer->lock))) + while (test_bit(0, &timer->lock)) + cpu_relax(); - spin_lock_irqsave(&timerlist_lock, flags); +repeat: + base = timer->base; + spin_lock(&base->lock); + if (base != timer->base) { + spin_unlock(&base->lock); + goto repeat; + } ret = detach_timer(timer); timer->list.next = timer->list.prev = NULL; - spin_unlock_irqrestore(&timerlist_lock, flags); + spin_unlock(&base->lock); + + clear_bit(0, &timer->lock); + local_irq_restore(flags); + return ret; } @@ -242,32 +285,58 @@ void sync_timers(void) * (for reference counting). */ -int del_timer_sync(struct timer_list * timer) +int del_timer_sync(timer_t * timer) { + unsigned long flags; + tvec_base_t * base; int ret = 0; + CHECK_BASE(timer->base); + if (!timer->base) + return 0; + + local_irq_save(flags); + while (unlikely(test_and_set_bit(0, &timer->lock))) + while (test_bit(0, &timer->lock)) + cpu_relax(); + for (;;) { - unsigned long flags; int running; - - spin_lock_irqsave(&timerlist_lock, flags); +repeat: + base = timer->base; + spin_lock(&base->lock); + if (base != timer->base) { + spin_unlock(&base->lock); + goto repeat; + } ret += detach_timer(timer); timer->list.next = timer->list.prev = 0; - running = timer_is_running(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); + running = timer_is_running(base, timer); + spin_unlock(&base->lock); if (!running) break; - timer_synchronize(timer); + clear_bit(0, &timer->lock); + local_irq_restore(flags); + + timer_synchronize(base, timer); + + local_irq_save(flags); + while (unlikely(test_and_set_bit(0, &timer->lock))) + while (test_bit(0, &timer->lock)) + cpu_relax(); } + clear_bit(0, &timer->lock); + local_irq_restore(flags); + return ret; } #endif -static inline void cascade_timers(struct timer_vec *tv) +static void cascade(tvec_base_t *base, tvec_t *tv) { /* cascade all the timers from tv up one level */ struct list_head *head, *curr, *next; @@ -279,66 +348,80 @@ static inline void cascade_timers(struct * detach them individually, just clear the list afterwards. */ while (curr != head) { - struct timer_list *tmp; + timer_t *tmp; - tmp = list_entry(curr, struct timer_list, list); + tmp = list_entry(curr, timer_t, list); + CHECK_BASE(tmp->base); + if (tmp->base != base) + BUG(); next = curr->next; list_del(curr); // not needed - internal_add_timer(tmp); + internal_add_timer(base, tmp); curr = next; } INIT_LIST_HEAD(head); tv->index = (tv->index + 1) & TVN_MASK; } -static inline void run_timer_list(void) +static void __run_timers(tvec_base_t *base) { - spin_lock_irq(&timerlist_lock); - while ((long)(jiffies - timer_jiffies) >= 0) { - LIST_HEAD(queued); + unsigned long flags; + + spin_lock_irqsave(&base->lock, flags); + while ((long)(jiffies - base->timer_jiffies) >= 0) { struct list_head *head, *curr; - if (!tv1.index) { - int n = 1; - do { - cascade_timers(tvecs[n]); - } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS); + LIST_HEAD(queued); + + /* + * Cascade timers: + */ + if (!base->tv1.index) { + cascade(base, &base->tv2); + if (base->tv2.index == 1) { + cascade(base, &base->tv3); + if (base->tv3.index == 1) { + cascade(base, &base->tv4); + if (base->tv4.index == 1) + cascade(base, &base->tv5); + } + } } - run_timer_list_running = &queued; + base->run_timer_list_running = &queued; repeat: - head = tv1.vec + tv1.index; + head = base->tv1.vec + base->tv1.index; curr = head->next; if (curr != head) { - struct timer_list *timer; void (*fn)(unsigned long); unsigned long data; + timer_t *timer; - timer = list_entry(curr, struct timer_list, list); + timer = list_entry(curr, timer_t, list); fn = timer->function; - data= timer->data; + data = timer->data; detach_timer(timer); timer->list.next = timer->list.prev = NULL; - timer_enter(timer); - spin_unlock_irq(&timerlist_lock); + timer_enter(base, timer); + spin_unlock_irq(&base->lock); fn(data); - spin_lock_irq(&timerlist_lock); - timer_exit(); + spin_lock_irq(&base->lock); + timer_exit(base); goto repeat; } - run_timer_list_running = NULL; - ++timer_jiffies; - tv1.index = (tv1.index + 1) & TVR_MASK; + base->run_timer_list_running = NULL; + ++base->timer_jiffies; + base->tv1.index = (base->tv1.index + 1) & TVR_MASK; curr = queued.next; while (curr != &queued) { - struct timer_list *timer; + timer_t *timer; timer = list_entry(curr, struct timer_list, list); curr = curr->next; - internal_add_timer(timer); + internal_add_timer(base, timer); } } - spin_unlock_irq(&timerlist_lock); + spin_unlock_irqrestore(&base->lock, flags); } spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED; @@ -634,42 +717,77 @@ static inline void calc_load(unsigned lo } } -/* jiffies at the most recent update of wall time */ -unsigned long wall_jiffies; +static void run_all_timers(void) +{ + int i; + + for (i = 0; i < smp_num_cpus; i++) { + tvec_base_t *base = tvec_bases + cpu_logical_map(i); + if ((long)(jiffies - base->timer_jiffies) >= 0) + __run_timers(base); + } +} /* - * This spinlock protect us from races in SMP while playing with xtime. -arca + * Called by the local, per-CPU timer interrupt on SMP. + * + * This function has to do all sorts of locking to make legacy + * cli()-users and BH-disablers work. If locking doesnt succeed + * now then we fall back to TIMER_BH. */ -rwlock_t xtime_lock = RW_LOCK_UNLOCKED; -static inline void update_times(void) +void run_local_timers(void) { - unsigned long ticks; + int cpu = smp_processor_id(); + tvec_base_t *base = tvec_bases + cpu; - /* - * update_times() is run from the raw timer_bh handler so we - * just know that the irqs are locally enabled and so we don't - * need to save/restore the flags of the local CPU here. -arca - */ - write_lock_irq(&xtime_lock); - vxtime_lock(); + if (in_interrupt()) + goto out_mark; + + local_bh_disable(); + local_irq_disable(); + if (!spin_trylock(&global_bh_lock)) + goto out_enable_mark; + + if (!hardirq_trylock(cpu)) + goto out_unlock_enable_mark; + + if ((long)(jiffies - base->timer_jiffies) >= 0) + __run_timers(base); + + hardirq_endlock(cpu); + spin_unlock(&global_bh_lock); + local_irq_enable(); + local_bh_enable(); + return; + +out_unlock_enable_mark: + spin_unlock(&global_bh_lock); + +out_enable_mark: + local_irq_enable(); + local_bh_enable(); + +out_mark: + mark_bh(TIMER_BH); +} + +/* + * Called by the timer interrupt. xtime_lock must already be taken + * by the timer IRQ! + */ +static void update_times(void) +{ + unsigned long ticks; ticks = jiffies - wall_jiffies; if (ticks) { wall_jiffies += ticks; update_wall_time(ticks); } - vxtime_unlock(); - write_unlock_irq(&xtime_lock); calc_load(ticks); } -void timer_bh(void) -{ - update_times(); - run_timer_list(); -} - void do_timer(struct pt_regs *regs) { (*(unsigned long *)&jiffies)++; @@ -677,8 +795,18 @@ void do_timer(struct pt_regs *regs) /* SMP process accounting uses the local APIC timer */ update_process_times(user_mode(regs)); +#if defined(CONFIG_X86) || defined(CONFIG_IA64) /* x86-64 is also included by CONFIG_X86 */ + mark_bh(TIMER_BH); +#endif #endif + /* + * Right now only x86-SMP calls run_local_timers() from a + * per-CPU interrupt. + */ +#if !defined(CONFIG_X86) && !defined(CONFIG_IA64) /* x86-64 is also included by CONFIG_X86 */ mark_bh(TIMER_BH); +#endif + update_times(); if (TQ_ACTIVE(tq_timer)) mark_bh(TQUEUE_BH); } @@ -940,3 +1068,23 @@ asmlinkage long sys_nanosleep(struct tim } return 0; } + +void __init init_timers(void) +{ + int i, j; + + for (i = 0; i < NR_CPUS; i++) { + tvec_base_t *base = tvec_bases + i; + + spin_lock_init(&base->lock); + for (j = 0; j < TVN_SIZE; j++) { + INIT_LIST_HEAD(base->tv5.vec + j); + INIT_LIST_HEAD(base->tv4.vec + j); + INIT_LIST_HEAD(base->tv3.vec + j); + INIT_LIST_HEAD(base->tv2.vec + j); + } + for (j = 0; j < TVR_SIZE; j++) + INIT_LIST_HEAD(base->tv1.vec + j); + } + init_bh(TIMER_BH, run_all_timers); +} diff -urNp --exclude CVS --exclude BitKeeper x-ref/lib/bust_spinlocks.c x/lib/bust_spinlocks.c --- x-ref/lib/bust_spinlocks.c 2003-03-15 03:25:18.000000000 +0100 +++ x/lib/bust_spinlocks.c 2003-09-02 02:39:19.000000000 +0200 @@ -14,11 +14,8 @@ #include #include -extern spinlock_t timerlist_lock; - void bust_spinlocks(int yes) { - spin_lock_init(&timerlist_lock); if (yes) { oops_in_progress = 1; } else {