From e830683c9f220fae2917c3cffe51350a17e4ff3c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:31 -0500 Subject: [PATCH] softirqs: forced threading of softirqs commit cecf393e7eca1950e4299f21c072617b8f7b568c in tip. Split them into separate threads. One for each softirq [PG: original didn't need extra_flags back in 2.6.31, also add things like BLOCK_IOPOLL_SOFTIRQ which appeared in 33rt] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Paul Gortmaker --- include/linux/interrupt.h | 7 +- include/linux/sched.h | 19 ++++ kernel/Kconfig.preempt | 15 +++ kernel/sched.c | 28 +++++- kernel/softirq.c | 217 +++++++++++++++++++++++++++++++++++---------- 5 files changed, 233 insertions(+), 53 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 54c9394..f89e357 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -354,7 +354,6 @@ enum SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ - NR_SOFTIRQS }; @@ -372,11 +371,14 @@ struct softirq_action void (*action)(struct softirq_action *); }; +#define __raise_softirq_irqoff(nr) \ + do { or_softirq_pending(1UL << (nr)); } while (0) +#define __do_raise_softirq_irqoff(nr) __raise_softirq_irqoff(nr) + asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); -#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); @@ -524,6 +526,7 @@ extern void tasklet_kill(struct tasklet_struct *t); extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); extern void tasklet_init(struct tasklet_struct *t, void (*func)(unsigned long), unsigned long data); +extern void takeover_tasklets(unsigned int cpu); struct tasklet_hrtimer { struct hrtimer timer; diff --git a/include/linux/sched.h b/include/linux/sched.h index a993aa5..6a9d432 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -102,6 +102,12 @@ struct fs_struct; struct bts_context; struct perf_event_context; +#ifdef CONFIG_PREEMPT_SOFTIRQS +extern int softirq_preemption; +#else +# define softirq_preemption 0 +#endif + /* * List of flags we want to share for kernel threads, * if only because they are not used by them anyway. @@ -1172,6 +1178,7 @@ struct task_struct { void *stack; atomic_t usage; unsigned int flags; /* per process flags, defined below */ + unsigned int extra_flags; unsigned int ptrace; int lock_depth; /* BKL lock depth */ @@ -1727,6 +1734,9 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ #define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ +/* Flags in the extra_flags field */ +#define PFE_SOFTIRQ 0x00000001 /* softirq context */ + /* * Only the _current_ task can read/write to tsk->flags, but other * tasks can access tsk->flags in readonly mode for example @@ -2379,6 +2389,8 @@ extern int __cond_resched_softirq(void); __cond_resched_softirq(); \ }) +extern int cond_resched_softirq_context(void); + /* * Does a critical section need to be broken due to another * task waiting?: (technically does not depend on CONFIG_PREEMPT, @@ -2408,6 +2420,13 @@ static inline void thread_group_cputime_free(struct signal_struct *sig) { } +static inline int softirq_need_resched(void) +{ + if (softirq_preemption && (current->extra_flags & PFE_SOFTIRQ)) + return need_resched(); + return 0; +} + /* * Reevaluate whether the task has signals pending delivery. * Wake the task if so. diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index bf987b9..2ff1834 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -52,3 +52,18 @@ config PREEMPT endchoice +config PREEMPT_SOFTIRQS + bool "Thread Softirqs" + default n +# depends on PREEMPT + help + This option reduces the latency of the kernel by 'threading' + soft interrupts. This means that all softirqs will execute + in softirqd's context. While this helps latency, it can also + reduce performance. + + The threading of softirqs can also be controlled via + /proc/sys/kernel/softirq_preemption runtime flag and the + sofirq-preempt=0/1 boot-time option. + + Say N if you are unsure. diff --git a/kernel/sched.c b/kernel/sched.c index 13dfb38..86aeeb6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3342,7 +3342,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, tmp = cputime_to_cputime64(cputime); if (hardirq_count() - hardirq_offset) cpustat->irq = cputime64_add(cpustat->irq, tmp); - else if (softirq_count()) + else if (softirq_count() || (p->extra_flags & PFE_SOFTIRQ)) cpustat->softirq = cputime64_add(cpustat->softirq, tmp); else cpustat->system = cputime64_add(cpustat->system, tmp); @@ -3906,7 +3906,7 @@ asmlinkage void __sched preempt_schedule_irq(void) int saved_lock_depth; /* Catch callers which need to be fixed */ - BUG_ON(ti->preempt_count || !irqs_disabled()); + WARN_ON_ONCE(ti->preempt_count || !irqs_disabled()); do { add_preempt_count(PREEMPT_ACTIVE); @@ -5053,9 +5053,12 @@ int __cond_resched_lock(spinlock_t *lock) } EXPORT_SYMBOL(__cond_resched_lock); +/* + * Voluntarily preempt a process context that has softirqs disabled: + */ int __sched __cond_resched_softirq(void) { - BUG_ON(!in_softirq()); + WARN_ON_ONCE(!in_softirq()); if (should_resched()) { local_bh_enable(); @@ -5067,6 +5070,25 @@ int __sched __cond_resched_softirq(void) } EXPORT_SYMBOL(__cond_resched_softirq); +/* + * Voluntarily preempt a softirq context (possible with softirq threading): + */ +int __sched cond_resched_softirq_context(void) +{ + WARN_ON_ONCE(!in_softirq()); + + if (softirq_need_resched() && system_state == SYSTEM_RUNNING) { + raw_local_irq_disable(); + _local_bh_enable(); + raw_local_irq_enable(); + __cond_resched(); + local_bh_disable(); + return 1; + } + return 0; +} +EXPORT_SYMBOL(cond_resched_softirq_context); + /** * yield - yield the current processor to other threads. * diff --git a/kernel/softirq.c b/kernel/softirq.c index 19ef218..eda9c66 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -8,9 +8,15 @@ * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) * * Remote softirq infrastructure is by Jens Axboe. + * + * Softirq-split implemetation by + * Copyright (C) 2005 Thomas Gleixner, Ingo Molnar */ #include +#include +#include +#include #include #include #include @@ -54,7 +60,14 @@ EXPORT_SYMBOL(irq_stat); static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; -static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); +struct softirqdata { + int nr; + unsigned long cpu; + struct task_struct *tsk; + int running; +}; + +static DEFINE_PER_CPU(struct softirqdata [NR_SOFTIRQS], ksoftirqd); char *softirq_to_name[NR_SOFTIRQS] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", @@ -67,16 +80,32 @@ char *softirq_to_name[NR_SOFTIRQS] = { * to the pending events, so lets the scheduler to balance * the softirq load for us. */ -static void wakeup_softirqd(void) +static void wakeup_softirqd(int softirq) { /* Interrupts are disabled: no need to stop preemption */ - struct task_struct *tsk = __get_cpu_var(ksoftirqd); + struct task_struct *tsk = __get_cpu_var(ksoftirqd)[softirq].tsk; if (tsk && tsk->state != TASK_RUNNING) wake_up_process(tsk); } /* + * Wake up the softirq threads which have work + */ +static void trigger_softirqs(void) +{ + u32 pending = local_softirq_pending(); + int curr = 0; + + while (pending) { + if (pending & 1) + wakeup_softirqd(curr); + pending >>= 1; + curr++; + } +} + +/* * This one is for softirq.c-internal use, * where hardirqs are disabled legitimately: */ @@ -188,7 +217,7 @@ EXPORT_SYMBOL(local_bh_enable_ip); */ #define MAX_SOFTIRQ_RESTART 10 -asmlinkage void __do_softirq(void) +static void ___do_softirq(void) { struct softirq_action *h; __u32 pending; @@ -198,9 +227,6 @@ asmlinkage void __do_softirq(void) pending = local_softirq_pending(); account_system_vtime(current); - __local_bh_disable((unsigned long)__builtin_return_address(0)); - lockdep_softirq_enter(); - cpu = smp_processor_id(); restart: /* Reset the pending bitmask before enabling irqs */ @@ -228,6 +254,7 @@ restart: } rcu_bh_qs(cpu); + cond_resched_softirq_context(); } h++; pending >>= 1; @@ -240,12 +267,34 @@ restart: goto restart; if (pending) - wakeup_softirqd(); + trigger_softirqs(); +} + +asmlinkage void __do_softirq(void) +{ +#ifdef CONFIG_PREEMPT_SOFTIRQS + /* + * 'preempt harder'. Push all softirq processing off to ksoftirqd. + */ + if (softirq_preemption) { + if (local_softirq_pending()) + trigger_softirqs(); + return; + } +#endif + /* + * 'immediate' softirq execution: + */ + __local_bh_disable((unsigned long)__builtin_return_address(0)); + lockdep_softirq_enter(); + + ___do_softirq(); lockdep_softirq_exit(); account_system_vtime(current); _local_bh_enable(); + } #ifndef __ARCH_HAS_DO_SOFTIRQ @@ -316,19 +365,11 @@ void irq_exit(void) */ inline void raise_softirq_irqoff(unsigned int nr) { - __raise_softirq_irqoff(nr); + __do_raise_softirq_irqoff(nr); - /* - * If we're in an interrupt or softirq, we're done - * (this also catches softirq-disabled code). We will - * actually run the softirq once we return from - * the irq or softirq. - * - * Otherwise we wake up ksoftirqd to make sure we - * schedule the softirq soon. - */ - if (!in_interrupt()) - wakeup_softirqd(); +#ifdef CONFIG_PREEMPT_SOFTIRQS + wakeup_softirqd(nr); +#endif } void raise_softirq(unsigned int nr) @@ -426,7 +467,7 @@ static void tasklet_action(struct softirq_action *a) t->next = NULL; *__get_cpu_var(tasklet_vec).tail = t; __get_cpu_var(tasklet_vec).tail = &(t->next); - __raise_softirq_irqoff(TASKLET_SOFTIRQ); + __do_raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_enable(); } } @@ -461,7 +502,7 @@ static void tasklet_hi_action(struct softirq_action *a) t->next = NULL; *__get_cpu_var(tasklet_hi_vec).tail = t; __get_cpu_var(tasklet_hi_vec).tail = &(t->next); - __raise_softirq_irqoff(HI_SOFTIRQ); + __do_raise_softirq_irqoff(HI_SOFTIRQ); local_irq_enable(); } } @@ -692,33 +733,56 @@ void __init softirq_init(void) open_softirq(HI_SOFTIRQ, tasklet_hi_action); } -static int run_ksoftirqd(void * __bind_cpu) +static int run_ksoftirqd(void * __data) { + /* Priority needs to be below hardirqs */ + struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2 - 1}; + struct softirqdata *data = __data; + u32 mask = (1 << data->nr); + struct softirq_action *h; + + sys_sched_setscheduler(current->pid, SCHED_FIFO, ¶m); + current->extra_flags |= PFE_SOFTIRQ; set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { preempt_disable(); - if (!local_softirq_pending()) { + if (!(local_softirq_pending() & mask)) { preempt_enable_and_schedule(); preempt_disable(); } __set_current_state(TASK_RUNNING); + data->running = 1; - while (local_softirq_pending()) { + while (local_softirq_pending() & mask) { /* Preempt disable stops cpu going offline. If already offline, we'll be on wrong CPU: don't process */ - if (cpu_is_offline((long)__bind_cpu)) + if (cpu_is_offline(data->cpu)) goto wait_to_die; - do_softirq(); + + local_irq_disable(); __preempt_enable_no_resched(); + set_softirq_pending(local_softirq_pending() & ~mask); + local_bh_disable(); + local_irq_enable(); + + h = &softirq_vec[data->nr]; + if (h) + h->action(h); + rcu_bh_qs(data->cpu); + + local_irq_disable(); + _local_bh_enable(); + local_irq_enable(); + cond_resched(); preempt_disable(); - rcu_sched_qs((long)__bind_cpu); } preempt_enable(); set_current_state(TASK_INTERRUPTIBLE); + data->running = 0; } __set_current_state(TASK_RUNNING); return 0; @@ -768,7 +832,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu) BUG(); } -static void takeover_tasklets(unsigned int cpu) +void takeover_tasklets(unsigned int cpu) { /* CPU is dead, so no lock needed. */ local_irq_disable(); @@ -794,49 +858,83 @@ static void takeover_tasklets(unsigned int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ +static const char *softirq_names [] = +{ + [HI_SOFTIRQ] = "high", + [SCHED_SOFTIRQ] = "sched", + [TIMER_SOFTIRQ] = "timer", + [NET_TX_SOFTIRQ] = "net-tx", + [NET_RX_SOFTIRQ] = "net-rx", + [BLOCK_SOFTIRQ] = "block", + [BLOCK_IOPOLL_SOFTIRQ]= "block-iopoll", + [TASKLET_SOFTIRQ] = "tasklet", +#ifdef CONFIG_HIGH_RES_TIMERS + [HRTIMER_SOFTIRQ] = "hrtimer", +#endif + [RCU_SOFTIRQ] = "rcu", +}; + static int __cpuinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { - int hotcpu = (unsigned long)hcpu; + int hotcpu = (unsigned long)hcpu, i; struct task_struct *p; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); - if (IS_ERR(p)) { - printk("ksoftirqd for %i failed\n", hotcpu); - return NOTIFY_BAD; + for (i = 0; i < NR_SOFTIRQS; i++) { + per_cpu(ksoftirqd, hotcpu)[i].nr = i; + per_cpu(ksoftirqd, hotcpu)[i].cpu = hotcpu; + per_cpu(ksoftirqd, hotcpu)[i].tsk = NULL; } - kthread_bind(p, hotcpu); - per_cpu(ksoftirqd, hotcpu) = p; - break; + for (i = 0; i < NR_SOFTIRQS; i++) { + p = kthread_create(run_ksoftirqd, + &per_cpu(ksoftirqd, hotcpu)[i], + "softirq-%s/%d", softirq_names[i], + hotcpu); + if (IS_ERR(p)) { + printk("ksoftirqd %d for %i failed\n", i, + hotcpu); + return NOTIFY_BAD; + } + kthread_bind(p, hotcpu); + per_cpu(ksoftirqd, hotcpu)[i].tsk = p; + } + break; + break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: - wake_up_process(per_cpu(ksoftirqd, hotcpu)); + for (i = 0; i < NR_SOFTIRQS; i++) + wake_up_process(per_cpu(ksoftirqd, hotcpu)[i].tsk); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: - if (!per_cpu(ksoftirqd, hotcpu)) - break; - /* Unbind so it can run. Fall thru. */ - kthread_bind(per_cpu(ksoftirqd, hotcpu), - cpumask_any(cpu_online_mask)); +#if 0 + for (i = 0; i < NR_SOFTIRQS; i++) { + if (!per_cpu(ksoftirqd, hotcpu)[i].tsk) + continue; + kthread_bind(per_cpu(ksoftirqd, hotcpu)[i].tsk, + cpumask_any(cpu_online_mask)); + } +#endif case CPU_DEAD: case CPU_DEAD_FROZEN: { struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; - p = per_cpu(ksoftirqd, hotcpu); - per_cpu(ksoftirqd, hotcpu) = NULL; - sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); - kthread_stop(p); + for (i = 0; i < NR_SOFTIRQS; i++) { + p = per_cpu(ksoftirqd, hotcpu)[i].tsk; + per_cpu(ksoftirqd, hotcpu)[i].tsk = NULL; + sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); + kthread_stop(p); + } takeover_tasklets(hotcpu); break; } #endif /* CONFIG_HOTPLUG_CPU */ - } + } return NOTIFY_OK; } @@ -856,6 +954,29 @@ static __init int spawn_ksoftirqd(void) } early_initcall(spawn_ksoftirqd); + +#ifdef CONFIG_PREEMPT_SOFTIRQS + +int softirq_preemption = 1; + +EXPORT_SYMBOL(softirq_preemption); + +static int __init softirq_preempt_setup (char *str) +{ + if (!strncmp(str, "off", 3)) + softirq_preemption = 0; + else + get_option(&str, &softirq_preemption); + if (!softirq_preemption) + printk("turning off softirq preemption!\n"); + + return 1; +} + +__setup("softirq-preempt=", softirq_preempt_setup); + +#endif + #ifdef CONFIG_SMP /* * Call a function on all processors -- 1.7.0.4