aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolas Saenz Julienne <nsaenzju@redhat.com>2021-07-08 12:29:50 +0200
committerNicolas Saenz Julienne <nsaenz@kernel.org>2021-07-09 13:47:36 +0200
commit3f5a9e1b5ebd7f2b7bb3c4055d6016082a5f672f (patch)
treed7b3da6950c291fec2c7e830edf6cd512011c70e
parent95f3df43a99d32dc17938f245c27cb80466d9071 (diff)
downloadlinux-rpi-timers-fix.tar.gz
timers: Fix get_next_timer_interrupt() with no timers pendingtimers-fix
31cd0e119d50 ("timers: Recalculate next timer interrupt only when necessary") subtly altered get_next_timer_interrupt()'s behaviour. The function no longer consistently returns KTIME_MAX with no timers pending. In order to decide if there are any timers pending we check whether the next expiry will happen NEXT_TIMER_MAX_DELTA jiffies from now. Unfortunately, the next expiry time and the timer base clock are no longer updated in unison. The former changes upon certain timer operations (enqueue, expire, detach), whereas the latter keeps track of jiffies as they move forward. Ultimately breaking the logic above. A simplified example: - Upon entering get_next_timer_interrupt() with: jiffies = 1 base->clk = 0; base->next_expiry = NEXT_TIMER_MAX_DELTA; 'base->next_expiry == base->clk + NEXT_TIMER_MAX_DELTA', the function returns KTIME_MAX. - 'base->clk' is updated to the jiffies value. - The next time we enter get_next_timer_interrupt(), taking into account no timer operations happened: base->clk = 1; base->next_expiry = NEXT_TIMER_MAX_DELTA; 'base->next_expiry != base->clk + NEXT_TIMER_MAX_DELTA', the function returns a valid expire time, which is incorrect. This, ultimately might unnecessarily rearm sched's timer on nohz_full setups, and add latency to the system[1]. So, introduce 'base->timers_pending'[2], update it every time 'base->next_expiry' changes, and use it in get_next_timer_interrupt(). [1] See tick_nohz_stop_tick(). [2] A quick check in x86_64 and arm64 with pahole shows it doesn't make 'struct timer_base' any bigger. Fixes: 31cd0e119d50 ("timers: Recalculate next timer interrupt only when necessary") Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
-rw-r--r--kernel/time/timer.c8
1 files changed, 5 insertions, 3 deletions
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 3fadb58fc9d7b1..9eb11c2209e56b 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -207,6 +207,7 @@ struct timer_base {
unsigned int cpu;
bool next_expiry_recalc;
bool is_idle;
+ bool timers_pending;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
struct hlist_head vectors[WHEEL_SIZE];
} ____cacheline_aligned;
@@ -595,6 +596,7 @@ static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
* can reevaluate the wheel:
*/
base->next_expiry = bucket_expiry;
+ base->timers_pending = true;
base->next_expiry_recalc = false;
trigger_dyntick_cpu(base, timer);
}
@@ -1582,6 +1584,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base)
}
base->next_expiry_recalc = false;
+ base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA);
return next;
}
@@ -1633,7 +1636,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
u64 expires = KTIME_MAX;
unsigned long nextevt;
- bool is_max_delta;
/*
* Pretend that there is no timer pending if the cpu is offline.
@@ -1646,7 +1648,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
if (base->next_expiry_recalc)
base->next_expiry = __next_timer_interrupt(base);
nextevt = base->next_expiry;
- is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
/*
* We have a fresh next event. Check whether we can forward the
@@ -1664,7 +1665,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
expires = basem;
base->is_idle = false;
} else {
- if (!is_max_delta)
+ if (base->timers_pending)
expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
/*
* If we expect to sleep more than a tick, mark the base idle.
@@ -1947,6 +1948,7 @@ int timers_prepare_cpu(unsigned int cpu)
base = per_cpu_ptr(&timer_bases[b], cpu);
base->clk = jiffies;
base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+ base->timers_pending = false;
base->is_idle = false;
}
return 0;