diff -urN 2.4.6pre5/arch/i386/kernel/entry.S softirq/arch/i386/kernel/entry.S --- 2.4.6pre5/arch/i386/kernel/entry.S Thu Jun 21 08:03:30 2001 +++ softirq/arch/i386/kernel/entry.S Thu Jun 21 15:58:06 2001 @@ -204,8 +204,18 @@ movl %eax,EAX(%esp) # save the return value ENTRY(ret_from_sys_call) cli # need_resched and signals atomic test +#ifdef CONFIG_SMP + movl processor(%ebx),%eax + shll $CONFIG_X86_L1_CACHE_SHIFT,%eax + testl $0, SYMBOL_NAME(irq_stat)(,%eax) # softirq_pending +#else + testl $0, SYMBOL_NAME(irq_stat) # softirq_pending +#endif + jne handle_softirq +handle_softirq_back: cmpl $0,need_resched(%ebx) jne reschedule +reschedule_back: cmpl $0,sigpending(%ebx) jne signal_return restore_all: @@ -256,9 +266,14 @@ jmp restore_all ALIGN +handle_softirq: + call SYMBOL_NAME(do_softirq) + jmp handle_softirq_back + + ALIGN reschedule: call SYMBOL_NAME(schedule) # test - jmp ret_from_sys_call + jmp reschedule_back ENTRY(divide_error) pushl $0 # no error code diff -urN 2.4.6pre5/include/asm-alpha/hardirq.h softirq/include/asm-alpha/hardirq.h --- 2.4.6pre5/include/asm-alpha/hardirq.h Thu Jun 21 08:03:51 2001 +++ softirq/include/asm-alpha/hardirq.h Thu Jun 21 15:58:06 2001 @@ -10,6 +10,7 @@ unsigned int __local_irq_count; unsigned int __local_bh_count; unsigned int __syscall_count; + struct task_struct * __ksoftirqd_task; } ____cacheline_aligned irq_cpustat_t; #include /* Standard mappings for irq_cpustat_t above */ diff -urN 2.4.6pre5/include/asm-alpha/softirq.h softirq/include/asm-alpha/softirq.h --- 2.4.6pre5/include/asm-alpha/softirq.h Thu Jun 21 08:03:51 2001 +++ softirq/include/asm-alpha/softirq.h Thu Jun 21 15:58:06 2001 @@ -8,21 +8,28 @@ extern inline void cpu_bh_disable(int cpu) { local_bh_count(cpu)++; - mb(); + barrier(); } -extern inline void cpu_bh_enable(int cpu) +extern inline void __cpu_bh_enable(int cpu) { - mb(); + barrier(); local_bh_count(cpu)--; } -#define local_bh_enable() cpu_bh_enable(smp_processor_id()) -#define __local_bh_enable local_bh_enable +#define __local_bh_enable() __cpu_bh_enable(smp_processor_id()) #define local_bh_disable() cpu_bh_disable(smp_processor_id()) -#define in_softirq() (local_bh_count(smp_processor_id()) != 0) +#define local_bh_enable() \ +do { \ + int cpu; \ + \ + barrier(); \ + cpu = smp_processor_id(); \ + if (!--local_bh_count(cpu) && softirq_pending(cpu)) \ + do_softirq(); \ +} while (0) -#define __cpu_raise_softirq(cpu,nr) set_bit((nr), &softirq_pending(cpu)) +#define in_softirq() (local_bh_count(smp_processor_id()) != 0) #endif /* _ALPHA_SOFTIRQ_H */ diff -urN 2.4.6pre5/include/asm-i386/hardirq.h softirq/include/asm-i386/hardirq.h --- 2.4.6pre5/include/asm-i386/hardirq.h Thu Jun 21 08:03:51 2001 +++ softirq/include/asm-i386/hardirq.h Thu Jun 21 15:58:06 2001 @@ -11,6 +11,7 @@ unsigned int __local_irq_count; unsigned int __local_bh_count; unsigned int __syscall_count; + struct task_struct * __ksoftirqd_task; /* waitqueue is too large */ unsigned int __nmi_count; /* arch dependent */ } ____cacheline_aligned irq_cpustat_t; diff -urN 2.4.6pre5/include/asm-i386/softirq.h softirq/include/asm-i386/softirq.h --- 2.4.6pre5/include/asm-i386/softirq.h Thu Jun 21 08:03:52 2001 +++ softirq/include/asm-i386/softirq.h Thu Jun 21 15:58:06 2001 @@ -11,8 +11,6 @@ #define local_bh_disable() cpu_bh_disable(smp_processor_id()) #define __local_bh_enable() __cpu_bh_enable(smp_processor_id()) -#define __cpu_raise_softirq(cpu,nr) set_bit((nr), &softirq_pending(cpu)); -#define raise_softirq(nr) __cpu_raise_softirq(smp_processor_id(), (nr)) #define in_softirq() (local_bh_count(smp_processor_id()) != 0) @@ -28,6 +26,7 @@ do { \ unsigned int *ptr = &local_bh_count(smp_processor_id()); \ \ + barrier(); \ if (!--*ptr) \ __asm__ __volatile__ ( \ "cmpl $0, -8(%0);" \ diff -urN 2.4.6pre5/include/asm-sparc/hardirq.h softirq/include/asm-sparc/hardirq.h --- 2.4.6pre5/include/asm-sparc/hardirq.h Thu Jun 21 08:03:53 2001 +++ softirq/include/asm-sparc/hardirq.h Thu Jun 21 15:58:06 2001 @@ -23,6 +23,7 @@ #endif unsigned int __local_bh_count; unsigned int __syscall_count; + struct task_struct * __ksoftirqd_task; } ____cacheline_aligned irq_cpustat_t; #include /* Standard mappings for irq_cpustat_t above */ diff -urN 2.4.6pre5/include/asm-sparc64/hardirq.h softirq/include/asm-sparc64/hardirq.h --- 2.4.6pre5/include/asm-sparc64/hardirq.h Thu Jun 21 08:03:54 2001 +++ softirq/include/asm-sparc64/hardirq.h Thu Jun 21 15:58:06 2001 @@ -22,6 +22,7 @@ #endif unsigned int __local_bh_count; unsigned int __syscall_count; + struct task_struct * __ksoftirqd_task; } ____cacheline_aligned irq_cpustat_t; #include /* Standard mappings for irq_cpustat_t above */ diff -urN 2.4.6pre5/include/linux/interrupt.h softirq/include/linux/interrupt.h --- 2.4.6pre5/include/linux/interrupt.h Thu Jun 21 08:03:56 2001 +++ softirq/include/linux/interrupt.h Thu Jun 21 15:58:06 2001 @@ -74,6 +74,22 @@ asmlinkage void do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); +static inline void __cpu_raise_softirq(int cpu, int nr) +{ + softirq_pending(cpu) |= (1<state)) -#define tasklet_unlock(t) clear_bit(TASKLET_STATE_RUN, &(t)->state) +#define tasklet_unlock(t) do { smp_mb__before_clear_bit(); clear_bit(TASKLET_STATE_RUN, &(t)->state); } while(0) #define tasklet_unlock_wait(t) while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } extern void tasklet_schedule(struct tasklet_struct *t); diff -urN 2.4.6pre5/include/linux/irq_cpustat.h softirq/include/linux/irq_cpustat.h --- 2.4.6pre5/include/linux/irq_cpustat.h Thu Jun 21 08:03:56 2001 +++ softirq/include/linux/irq_cpustat.h Thu Jun 21 15:58:06 2001 @@ -30,6 +30,7 @@ #define local_irq_count(cpu) __IRQ_STAT((cpu), __local_irq_count) #define local_bh_count(cpu) __IRQ_STAT((cpu), __local_bh_count) #define syscall_count(cpu) __IRQ_STAT((cpu), __syscall_count) +#define ksoftirqd_task(cpu) __IRQ_STAT((cpu), __ksoftirqd_task) /* arch dependent irq_stat fields */ #define nmi_count(cpu) __IRQ_STAT((cpu), __nmi_count) /* i386, ia64 */ diff -urN 2.4.6pre5/kernel/softirq.c softirq/kernel/softirq.c --- 2.4.6pre5/kernel/softirq.c Thu Jun 21 08:03:57 2001 +++ softirq/kernel/softirq.c Thu Jun 21 15:58:06 2001 @@ -51,17 +51,20 @@ { int cpu = smp_processor_id(); __u32 pending; + long flags; + __u32 mask; if (in_interrupt()) return; - local_irq_disable(); + local_irq_save(flags); pending = softirq_pending(cpu); if (pending) { struct softirq_action *h; + mask = ~pending; local_bh_disable(); restart: /* Reset the pending bitmask before enabling irqs */ @@ -81,12 +84,26 @@ local_irq_disable(); pending = softirq_pending(cpu); - if (pending) + if (pending & mask) { + mask &= ~pending; goto restart; + } __local_bh_enable(); + + if (pending) { + /* + * we cannot loop indefinitely here to avoid userspace starvation, + * but we also don't want to introduce a worst case 1/HZ latency + * to the pending events, so lets the scheduler to balance + * the softirq load for us. + */ + struct task_struct * tsk = ksoftirqd_task(cpu); + if (tsk && tsk->state != TASK_RUNNING) + wake_up_process(tsk); + } } - local_irq_enable(); + local_irq_restore(flags); } @@ -112,8 +129,7 @@ * If nobody is running it then add it to this CPU's * tasklet queue. */ - if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state) && - tasklet_trylock(t)) { + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { t->next = tasklet_vec[cpu].list; tasklet_vec[cpu].list = t; __cpu_raise_softirq(cpu, TASKLET_SOFTIRQ); @@ -130,8 +146,7 @@ cpu = smp_processor_id(); local_irq_save(flags); - if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state) && - tasklet_trylock(t)) { + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { t->next = tasklet_hi_vec[cpu].list; tasklet_hi_vec[cpu].list = t; __cpu_raise_softirq(cpu, HI_SOFTIRQ); @@ -148,37 +163,29 @@ local_irq_disable(); list = tasklet_vec[cpu].list; tasklet_vec[cpu].list = NULL; + local_irq_enable(); while (list) { struct tasklet_struct *t = list; list = list->next; - /* - * A tasklet is only added to the queue while it's - * locked, so no other CPU can have this tasklet - * pending: - */ if (!tasklet_trylock(t)) BUG(); -repeat: - if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) - BUG(); if (!atomic_read(&t->count)) { - local_irq_enable(); + clear_bit(TASKLET_STATE_SCHED, &t->state); t->func(t->data); - local_irq_disable(); - /* - * One more run if the tasklet got reactivated: - */ - if (test_bit(TASKLET_STATE_SCHED, &t->state)) - goto repeat; + tasklet_unlock(t); + continue; } tasklet_unlock(t); - if (test_bit(TASKLET_STATE_SCHED, &t->state)) - tasklet_schedule(t); + + local_irq_disable(); + t->next = tasklet_vec[cpu].list; + tasklet_vec[cpu].list = t; + __cpu_raise_softirq(cpu, TASKLET_SOFTIRQ); + local_irq_enable(); } - local_irq_enable(); } @@ -193,6 +200,7 @@ local_irq_disable(); list = tasklet_hi_vec[cpu].list; tasklet_hi_vec[cpu].list = NULL; + local_irq_enable(); while (list) { struct tasklet_struct *t = list; @@ -201,21 +209,20 @@ if (!tasklet_trylock(t)) BUG(); -repeat: - if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) - BUG(); if (!atomic_read(&t->count)) { - local_irq_enable(); + clear_bit(TASKLET_STATE_SCHED, &t->state); t->func(t->data); - local_irq_disable(); - if (test_bit(TASKLET_STATE_SCHED, &t->state)) - goto repeat; + tasklet_unlock(t); + continue; } tasklet_unlock(t); - if (test_bit(TASKLET_STATE_SCHED, &t->state)) - tasklet_hi_schedule(t); + + local_irq_disable(); + t->next = tasklet_hi_vec[cpu].list; + tasklet_hi_vec[cpu].list = t; + __cpu_raise_softirq(cpu, HI_SOFTIRQ); + local_irq_enable(); } - local_irq_enable(); } @@ -335,3 +342,61 @@ f(data); } } + +static int ksoftirqd(void * __bind_cpu) +{ + int bind_cpu = *(int *) __bind_cpu; + int cpu = cpu_logical_map(bind_cpu); + + daemonize(); + current->nice = 19; + sigfillset(¤t->blocked); + + /* Migrate to the right CPU */ + current->cpus_allowed = 1UL << cpu; + while (smp_processor_id() != cpu) + schedule(); + + sprintf(current->comm, "ksoftirqd_CPU%d", bind_cpu); + + __set_current_state(TASK_INTERRUPTIBLE); + mb(); + + ksoftirqd_task(cpu) = current; + + for (;;) { + if (!softirq_pending(cpu)) + schedule(); + + __set_current_state(TASK_RUNNING); + + while (softirq_pending(cpu)) { + do_softirq(); + if (current->need_resched) + schedule(); + } + + __set_current_state(TASK_INTERRUPTIBLE); + } +} + +static __init int spawn_ksoftirqd(void) +{ + int cpu; + + for (cpu = 0; cpu < smp_num_cpus; cpu++) { + if (kernel_thread(ksoftirqd, (void *) &cpu, + CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0) + printk("spawn_ksoftirqd() failed for cpu %d\n", cpu); + else { + while (!ksoftirqd_task(cpu_logical_map(cpu))) { + current->policy |= SCHED_YIELD; + schedule(); + } + } + } + + return 0; +} + +__initcall(spawn_ksoftirqd);