From 486ba29003d83098d1afeb27656fad748392c624 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Jul 2009 08:44:31 -0500 Subject: [PATCH] hrtimer: fixup hrtimer callback changes for preempt-rt commit 305d0a1ab668ee560de7fe5eed0b62cf419a50f9 in tip. In preempt-rt we can not call the callbacks which take sleeping locks from the timer interrupt context. Bring back the softirq split for now, until we fixed the signal delivery problem for real. [PG: restore cb_entry field; it is gone in 33+] Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Paul Gortmaker --- include/linux/hrtimer.h | 3 + kernel/hrtimer.c | 144 +++++++++++++++++++++++++++++++++++++++++----- kernel/sched.c | 2 + kernel/time/tick-sched.c | 1 + 4 files changed, 135 insertions(+), 15 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 67945c3..1cc4577 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -107,6 +107,8 @@ struct hrtimer { enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; unsigned long state; + struct list_head cb_entry; + int irqsafe; #ifdef CONFIG_TIMER_STATS int start_pid; void *start_site; @@ -142,6 +144,7 @@ struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; clockid_t index; struct rb_root active; + struct list_head expired; struct rb_node *first; ktime_t resolution; ktime_t (*get_time)(void); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 93b2df2..f22611d 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -563,15 +563,24 @@ static int hrtimer_reprogram(struct hrtimer *timer, WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); +#ifndef CONFIG_PREEMPT_RT /* * When the callback is running, we do not reprogram the clock event * device. The timer callback is either running on a different CPU or * the callback is executed in the hrtimer_interrupt context. The - * reprogramming is handled either by the softirq, which called the - * callback or at the end of the hrtimer_interrupt. + * reprogramming is handled at the end of the hrtimer_interrupt. */ if (hrtimer_callback_running(timer)) return 0; +#else + /* + * preempt-rt changes the rules here as long as we have not + * solved the callback problem. For softirq based timers we + * need to allow reprogramming. + */ + if (hrtimer_callback_running(timer) && timer->irqsafe) + return 0; +#endif /* * CLOCK_REALTIME timer might be requested with an absolute @@ -679,6 +688,8 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } +static void __run_hrtimer(struct hrtimer *timer, ktime_t *now); +static int hrtimer_rt_defer(struct hrtimer *timer); /* * When High resolution timers are active, try to reprogram. Note, that in case @@ -691,6 +702,19 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, int wakeup) { if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { +#ifdef CONFIG_PREEMPT_RT + /* + * Move softirq based timers away from the rbtree in + * case it expired already. Otherwise we would have a + * stale base->first entry until the softirq runs. + */ + if (!hrtimer_rt_defer(timer)) { + ktime_t now = ktime_get(); + + __run_hrtimer(timer, &now); + return 1; + } +#endif if (wakeup) { raw_spin_unlock(&base->cpu_base->lock); raise_softirq_irqoff(HRTIMER_SOFTIRQ); @@ -934,6 +958,11 @@ static void __remove_hrtimer(struct hrtimer *timer, if (!(timer->state & HRTIMER_STATE_ENQUEUED)) goto out; + if (unlikely(!list_empty(&timer->cb_entry))) { + list_del_init(&timer->cb_entry); + goto out; + } + /* * Remove the timer from the rbtree and replace the first * entry pointer if necessary. @@ -1189,6 +1218,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, clock_id = CLOCK_MONOTONIC; timer->base = &cpu_base->clock_base[clock_id]; + INIT_LIST_HEAD(&timer->cb_entry); hrtimer_init_timer_hres(timer); #ifdef CONFIG_TIMER_STATS @@ -1268,6 +1298,77 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) timer->state &= ~HRTIMER_STATE_CALLBACK; } +#ifdef CONFIG_PREEMPT_RT + +/* + * The changes in mainline which removed the callback modes from + * hrtimer are not yet working with -rt. The non wakeup_process() + * based callbacks which involve sleeping locks need to be treated + * seperately. + */ +static void hrtimer_rt_run_pending(void) +{ + enum hrtimer_restart (*fn)(struct hrtimer *); + struct hrtimer_cpu_base *cpu_base; + struct hrtimer_clock_base *base; + struct hrtimer *timer; + int index, restart; + + local_irq_disable(); + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id()); + + raw_spin_lock(&cpu_base->lock); + + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { + base = &cpu_base->clock_base[index]; + + while (!list_empty(&base->expired)) { + timer = list_first_entry(&base->expired, + struct hrtimer, cb_entry); + + /* + * Same as the above __run_hrtimer function + * just we run with interrupts enabled. + */ + debug_hrtimer_deactivate(timer); + __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); + timer_stats_account_hrtimer(timer); + fn = timer->function; + + raw_spin_unlock_irq(&cpu_base->lock); + restart = fn(timer); + raw_spin_lock_irq(&cpu_base->lock); + + if (restart != HRTIMER_NORESTART) { + BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); + enqueue_hrtimer(timer, base); + } + timer->state &= ~HRTIMER_STATE_CALLBACK; + } + } + + raw_spin_unlock_irq(&cpu_base->lock); + + wake_up_timer_waiters(cpu_base); +} + +static int hrtimer_rt_defer(struct hrtimer *timer) +{ + if (timer->irqsafe) + return 0; + + __remove_hrtimer(timer, timer->base, timer->state, 0); + list_add_tail(&timer->cb_entry, &timer->base->expired); + return 1; +} + +#else + +static inline void hrtimer_rt_run_pending(void) { } +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; } + +#endif + #ifdef CONFIG_HIGH_RES_TIMERS /* @@ -1279,7 +1380,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); struct hrtimer_clock_base *base; ktime_t expires_next, now, entry_time, delta; - int i, retries = 0; + int i, retries = 0, raise = 0; BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; @@ -1335,7 +1436,10 @@ retry: break; } - __run_hrtimer(timer, &basenow); + if (!hrtimer_rt_defer(timer)) + __run_hrtimer(timer, &basenow); + else + raise = 1; } base++; } @@ -1351,6 +1455,10 @@ retry: if (expires_next.tv64 == KTIME_MAX || !tick_program_event(expires_next, 0)) { cpu_base->hang_detected = 0; + + if (raise) + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + return; } @@ -1428,17 +1536,18 @@ void hrtimer_peek_ahead_timers(void) local_irq_restore(flags); } -static void run_hrtimer_softirq(struct softirq_action *h) -{ - hrtimer_peek_ahead_timers(); -} - #else /* CONFIG_HIGH_RES_TIMERS */ static inline void __hrtimer_peek_ahead_timers(void) { } #endif /* !CONFIG_HIGH_RES_TIMERS */ +static void run_hrtimer_softirq(struct softirq_action *h) +{ + hrtimer_peek_ahead_timers(); + hrtimer_rt_run_pending(); +} + /* * Called from timer softirq every jiffy, expire hrtimers: * @@ -1473,7 +1582,7 @@ void hrtimer_run_queues(void) struct rb_node *node; struct hrtimer_cpu_base *cpu_base; struct hrtimer_clock_base *base; - int index, gettime = 1; + int index, gettime = 1, raise = 0; cpu_base = &per_cpu(hrtimer_bases, raw_smp_processor_id()); if (hrtimer_hres_active(cpu_base)) @@ -1500,12 +1609,16 @@ void hrtimer_run_queues(void) hrtimer_get_expires_tv64(timer)) break; - __run_hrtimer(timer, &base->softirq_time); + if (!hrtimer_rt_defer(timer)) + __run_hrtimer(timer, &base->softirq_time); + else + raise = 1; } raw_spin_unlock(&cpu_base->lock); } - wake_up_timer_waiters(cpu_base); + if (raise) + raise_softirq_irqoff(HRTIMER_SOFTIRQ); } /* @@ -1527,6 +1640,7 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) { sl->timer.function = hrtimer_wakeup; + sl->timer.irqsafe = 1; sl->task = task; } EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); @@ -1662,8 +1776,10 @@ static void __cpuinit init_hrtimers_cpu(int cpu) raw_spin_lock_init(&cpu_base->lock); - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) + for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { cpu_base->clock_base[i].cpu_base = cpu_base; + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired); + } hrtimer_init_hres(cpu_base); #ifdef CONFIG_PREEMPT_SOFTIRQS @@ -1781,9 +1897,7 @@ void __init hrtimers_init(void) hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); register_cpu_notifier(&hrtimers_nb); -#ifdef CONFIG_HIGH_RES_TIMERS open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); -#endif } /** diff --git a/kernel/sched.c b/kernel/sched.c index 17cef67..00237e3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -203,6 +203,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + rt_b->rt_period_timer.irqsafe = 1; rt_b->rt_period_timer.function = sched_rt_period_timer; } @@ -1202,6 +1203,7 @@ static void init_rq_hrtick(struct rq *rq) hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rq->hrtick_timer.function = hrtick; + rq->hrtick_timer.irqsafe = 1; } #else /* CONFIG_SCHED_HRTICK */ static inline void hrtick_clear(struct rq *rq) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index bc625d9..a85776e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -733,6 +733,7 @@ void tick_setup_sched_timer(void) * Emulate tick processing via per-CPU hrtimers: */ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + ts->sched_timer.irqsafe = 1; ts->sched_timer.function = tick_sched_timer; /* Get the next period (per cpu) */ -- 1.7.0.4