diff options
author | Paul Gortmaker <paul.gortmaker@windriver.com> | 2017-10-11 08:47:36 -0400 |
---|---|---|
committer | Paul Gortmaker <paul.gortmaker@windriver.com> | 2017-10-11 08:47:36 -0400 |
commit | 7d14fc7e925afb75ee3b208466f39ae477d5ee8e (patch) | |
tree | ec67b9516e0b666ba3e612a6c26a3e58a798b949 | |
parent | 907d0ac982c45d4a5a287d22ab6297370d43a660 (diff) | |
download | 4.12-rt-patches-7d14fc7e925afb75ee3b208466f39ae477d5ee8e.tar.gz |
sched/tsc: replace my fix with two mainline commitsrt-v4.12
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
4 files changed, 279 insertions, 86 deletions
diff --git a/patches/sched-clock-Initialize-all-per-CPU-state-before-swit.patch b/patches/sched-clock-Initialize-all-per-CPU-state-before-swit.patch new file mode 100644 index 00000000000000..95d17edcf2fb50 --- /dev/null +++ b/patches/sched-clock-Initialize-all-per-CPU-state-before-swit.patch @@ -0,0 +1,122 @@ +From cf15ca8deda86b27b66e27848b4b0fe58098fc0b Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Fri, 21 Apr 2017 12:11:53 +0200 +Subject: [PATCH] sched/clock: Initialize all per-CPU state before switching + (back) to unstable + +commit cf15ca8deda86b27b66e27848b4b0fe58098fc0b upstream. + +In preparation for not keeping the sched_clock_tick() active for +stable TSC, we need to explicitly initialize all per-CPU state +before switching back to unstable. + +Note: this patch looses the __gtod_offset calculation; it will be +restored in the next one. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Mike Galbraith <efault@gmx.de> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c +index 00a45c45beca..dc650851935f 100644 +--- a/kernel/sched/clock.c ++++ b/kernel/sched/clock.c +@@ -124,6 +124,12 @@ int sched_clock_stable(void) + return static_branch_likely(&__sched_clock_stable); + } + ++static void __scd_stamp(struct sched_clock_data *scd) ++{ ++ scd->tick_gtod = ktime_get_ns(); ++ scd->tick_raw = sched_clock(); ++} ++ + static void __set_sched_clock_stable(void) + { + struct sched_clock_data *scd = this_scd(); +@@ -141,8 +147,37 @@ static void __set_sched_clock_stable(void) + tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); + } + ++/* ++ * If we ever get here, we're screwed, because we found out -- typically after ++ * the fact -- that TSC wasn't good. This means all our clocksources (including ++ * ktime) could have reported wrong values. ++ * ++ * What we do here is an attempt to fix up and continue sort of where we left ++ * off in a coherent manner. ++ * ++ * The only way to fully avoid random clock jumps is to boot with: ++ * "tsc=unstable". ++ */ + static void __sched_clock_work(struct work_struct *work) + { ++ struct sched_clock_data *scd; ++ int cpu; ++ ++ /* take a current timestamp and set 'now' */ ++ preempt_disable(); ++ scd = this_scd(); ++ __scd_stamp(scd); ++ scd->clock = scd->tick_gtod + __gtod_offset; ++ preempt_enable(); ++ ++ /* clone to all CPUs */ ++ for_each_possible_cpu(cpu) ++ per_cpu(sched_clock_data, cpu) = *scd; ++ ++ printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n", ++ scd->tick_gtod, __gtod_offset, ++ scd->tick_raw, __sched_clock_offset); ++ + static_branch_disable(&__sched_clock_stable); + } + +@@ -150,27 +185,11 @@ static DECLARE_WORK(sched_clock_work, __sched_clock_work); + + static void __clear_sched_clock_stable(void) + { +- struct sched_clock_data *scd = this_scd(); +- +- /* +- * Attempt to make the stable->unstable transition continuous. +- * +- * Trouble is, this is typically called from the TSC watchdog +- * timer, which is late per definition. This means the tick +- * values can already be screwy. +- * +- * Still do what we can. +- */ +- __gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod); +- +- printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n", +- scd->tick_gtod, __gtod_offset, +- scd->tick_raw, __sched_clock_offset); ++ if (!sched_clock_stable()) ++ return; + + tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); +- +- if (sched_clock_stable()) +- schedule_work(&sched_clock_work); ++ schedule_work(&sched_clock_work); + } + + void clear_sched_clock_stable(void) +@@ -357,8 +376,7 @@ void sched_clock_tick(void) + * XXX arguably we can skip this if we expose tsc_clocksource_reliable + */ + scd = this_scd(); +- scd->tick_raw = sched_clock(); +- scd->tick_gtod = ktime_get_ns(); ++ __scd_stamp(scd); + + if (!sched_clock_stable() && likely(sched_clock_running)) + sched_clock_local(scd); +-- +2.1.4 + diff --git a/patches/sched-clock-fix-early-boot-splat-on-clock-transition.patch b/patches/sched-clock-fix-early-boot-splat-on-clock-transition.patch deleted file mode 100644 index 5d27cb185d4ef6..00000000000000 --- a/patches/sched-clock-fix-early-boot-splat-on-clock-transition.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 9383285e87322b696eb48309a5ef29d421b84bad Mon Sep 17 00:00:00 2001 -From: Paul Gortmaker <paul.gortmaker@windriver.com> -Date: Mon, 2 Oct 2017 21:59:48 -0400 -Subject: [PATCH rt-v4.11] sched/clock: fix early boot splat on clock transition to - unstable - -On an older machine with a Pentium(R) Dual-Core E5300 I see the -the following early (see time stamps) boot splat on clock transition -due to TSC unstable (indicated in the last line): - - [ 2.487904] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 - [ 2.487909] caller is debug_smp_processor_id+0x17/0x20 - [ 2.487911] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.11.12-rt14-00451 - [ 2.487911] Hardware name: Dell Inc. OptiPlex 760 /0M858N, BIOS A16 08/06/2013 - [ 2.487912] Call Trace: - [ 2.487918] dump_stack+0x4f/0x6a - [ 2.487919] check_preemption_disabled+0xda/0xe0 - [ 2.487921] debug_smp_processor_id+0x17/0x20 - [ 2.487924] clear_sched_clock_stable+0x28/0x80 - [ 2.487927] mark_tsc_unstable+0x22/0x70 - [ 2.487930] acpi_processor_get_power_info+0x3e3/0x6a0 - [ 2.487932] acpi_processor_power_init+0x3a/0x1d0 - [ 2.487933] __acpi_processor_start+0x162/0x1b0 - .... - [ 2.487950] acpi_processor_driver_init+0x20/0x96 - [ 2.487951] do_one_initcall+0x3f/0x170 - [ 2.487954] kernel_init_freeable+0x18e/0x216 - [ 2.487955] ? rest_init+0xd0/0xd0 - [ 2.487956] kernel_init+0x9/0x100 - [ 2.487958] ret_from_fork+0x22/0x30 - [ 2.487960] sched_clock: Marking unstable (2488005383, -223143)<-(2590866395, -103084155) - [ 2.488004] tsc: Marking TSC unstable due to TSC halts in idle - -(gdb) list *clear_sched_clock_stable+0x28 -0xffffffff8108bbb8 is in clear_sched_clock_stable (kernel/sched/clock.c:114). - -[...] - -112 static inline struct sched_clock_data *this_scd(void) -113 { -114 return this_cpu_ptr(&sched_clock_data); -115 } - -We now get this_scd with preemption disabled. I also decided to pass -in the scd to __clear_sched_clock_stable in the hope it made it more -clear that the caller (currently only one) needs to get this_scd with -preemption disabled, even though that wasn't strictly required. - -Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> - -diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c -index 11ad4bd995e2..32dcda23c616 100644 ---- a/kernel/sched/clock.c -+++ b/kernel/sched/clock.c -@@ -155,9 +155,8 @@ static void __sched_clock_work(struct work_struct *work) - - static DECLARE_WORK(sched_clock_work, __sched_clock_work); - --static void __clear_sched_clock_stable(void) -+static void __clear_sched_clock_stable(struct sched_clock_data *scd) - { -- struct sched_clock_data *scd = this_scd(); - - /* - * Attempt to make the stable->unstable transition continuous. -@@ -186,8 +185,14 @@ void clear_sched_clock_stable(void) - - smp_mb(); /* matches sched_clock_init_late() */ - -- if (sched_clock_running == 2) -- __clear_sched_clock_stable(); -+ if (sched_clock_running == 2) { -+ struct sched_clock_data *scd; -+ -+ preempt_disable(); -+ scd = this_scd(); -+ preempt_enable(); -+ __clear_sched_clock_stable(scd); -+ } - } - - void sched_clock_init_late(void) --- -2.1.4 - diff --git a/patches/series b/patches/series index 01d15e9288316b..3d67fcd7c8afa9 100644 --- a/patches/series +++ b/patches/series @@ -19,6 +19,8 @@ lockdep-Fix-compilation-error-for-CONFIG_MODULES-and.patch smp-hotplug-Move-unparking-of-percpu-threads-to-the-.patch # a few patches from tip's sched/core +sched-clock-Initialize-all-per-CPU-state-before-swit.patch +x86-tsc-sched-clock-clocksource-Use-clocksource-watc.patch 0001-sched-clock-Fix-early-boot-preempt-assumption-in-__s.patch 0001-init-Pin-init-task-to-the-boot-CPU-initially.patch 0002-arm-Adjust-system_state-check.patch @@ -229,7 +231,6 @@ posix-timers-no-broadcast.patch signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch # SCHED -sched-clock-fix-early-boot-splat-on-clock-transition.patch # GENERIC CMPXCHG diff --git a/patches/x86-tsc-sched-clock-clocksource-Use-clocksource-watc.patch b/patches/x86-tsc-sched-clock-clocksource-Use-clocksource-watc.patch new file mode 100644 index 00000000000000..6db62adb53399d --- /dev/null +++ b/patches/x86-tsc-sched-clock-clocksource-Use-clocksource-watc.patch @@ -0,0 +1,155 @@ +From b421b22b00b0011f6a2ce3561176c4e79e640c49 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Fri, 21 Apr 2017 12:14:13 +0200 +Subject: [PATCH] x86/tsc, sched/clock, clocksource: Use clocksource watchdog + to provide stable sync points + +commit b421b22b00b0011f6a2ce3561176c4e79e640c49 upstream. + +Currently we keep sched_clock_tick() active for stable TSC in order to +keep the per-CPU state semi up-to-date. The (obvious) problem is that +by the time we detect TSC is borked, our per-CPU state is also borked. + +So hook into the clocksource watchdog and call a method after we've +found it to still be stable. + +There's the obvious race where the TSC goes wonky between finding it +stable and us running the callback, but closing that is too much work +and not really worth it, since we're already detecting TSC wobbles +after the fact, so we cannot, per definition, fully avoid funny clock +values. + +And since the watchdog runs less often than the tick, this is also an +optimization. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Mike Galbraith <efault@gmx.de> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> + +diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c +index 66015195bd18..c1b16b328abe 100644 +--- a/arch/x86/kernel/tsc.c ++++ b/arch/x86/kernel/tsc.c +@@ -1033,6 +1033,15 @@ static void tsc_cs_mark_unstable(struct clocksource *cs) + pr_info("Marking TSC unstable due to clocksource watchdog\n"); + } + ++static void tsc_cs_tick_stable(struct clocksource *cs) ++{ ++ if (tsc_unstable) ++ return; ++ ++ if (using_native_sched_clock()) ++ sched_clock_tick_stable(); ++} ++ + /* + * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() + */ +@@ -1046,6 +1055,7 @@ static struct clocksource clocksource_tsc = { + .archdata = { .vclock_mode = VCLOCK_TSC }, + .resume = tsc_resume, + .mark_unstable = tsc_cs_mark_unstable, ++ .tick_stable = tsc_cs_tick_stable, + }; + + void mark_tsc_unstable(char *reason) +diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h +index f2b10d9ebd04..81490456c242 100644 +--- a/include/linux/clocksource.h ++++ b/include/linux/clocksource.h +@@ -96,6 +96,7 @@ struct clocksource { + void (*suspend)(struct clocksource *cs); + void (*resume)(struct clocksource *cs); + void (*mark_unstable)(struct clocksource *cs); ++ void (*tick_stable)(struct clocksource *cs); + + /* private: */ + #ifdef CONFIG_CLOCKSOURCE_WATCHDOG +diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h +index 34fe92ce1ebd..978cbb0af5f3 100644 +--- a/include/linux/sched/clock.h ++++ b/include/linux/sched/clock.h +@@ -63,8 +63,8 @@ extern void clear_sched_clock_stable(void); + */ + extern u64 __sched_clock_offset; + +- + extern void sched_clock_tick(void); ++extern void sched_clock_tick_stable(void); + extern void sched_clock_idle_sleep_event(void); + extern void sched_clock_idle_wakeup_event(u64 delta_ns); + +diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c +index dc650851935f..f861637f7fdc 100644 +--- a/kernel/sched/clock.c ++++ b/kernel/sched/clock.c +@@ -366,20 +366,38 @@ void sched_clock_tick(void) + { + struct sched_clock_data *scd; + ++ if (sched_clock_stable()) ++ return; ++ ++ if (unlikely(!sched_clock_running)) ++ return; ++ + WARN_ON_ONCE(!irqs_disabled()); + +- /* +- * Update these values even if sched_clock_stable(), because it can +- * become unstable at any point in time at which point we need some +- * values to fall back on. +- * +- * XXX arguably we can skip this if we expose tsc_clocksource_reliable +- */ + scd = this_scd(); + __scd_stamp(scd); ++ sched_clock_local(scd); ++} ++ ++void sched_clock_tick_stable(void) ++{ ++ u64 gtod, clock; + +- if (!sched_clock_stable() && likely(sched_clock_running)) +- sched_clock_local(scd); ++ if (!sched_clock_stable()) ++ return; ++ ++ /* ++ * Called under watchdog_lock. ++ * ++ * The watchdog just found this TSC to (still) be stable, so now is a ++ * good moment to update our __gtod_offset. Because once we find the ++ * TSC to be unstable, any computation will be computing crap. ++ */ ++ local_irq_disable(); ++ gtod = ktime_get_ns(); ++ clock = sched_clock(); ++ __gtod_offset = (clock + __sched_clock_offset) - gtod; ++ local_irq_enable(); + } + + /* +diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c +index 93621ae718d3..03918a19cf2d 100644 +--- a/kernel/time/clocksource.c ++++ b/kernel/time/clocksource.c +@@ -233,6 +233,9 @@ static void clocksource_watchdog(unsigned long data) + continue; + } + ++ if (cs == curr_clocksource && cs->tick_stable) ++ cs->tick_stable(cs); ++ + if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && + (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && + (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { +-- +2.1.4 + |