From 1ce09df80e40c1c17c2be8cec33c388857a42385 Mon Sep 17 00:00:00 2001 From: Carsten Emde Date: Mon, 18 Oct 2010 22:53:08 +0100 Subject: [PATCH] tracing: Add combined timer and wakeup latency histogram commit cfaca74d233d07b0e29c134062a980d1e89ddd20 in tip. The real-time wakeup latency tracer (wakeup_rt) and the wakeup latency histogram (CONFIG_WAKEUP_LATENCY_HIST) monitor processes that have the highest priority of the system throughout their entire wakeup processing and do not share this priority with any other concurrently running or scheduled process. Thus, this tracer should be able to determine a system's worst-case latency. However, in a situation where interrupts are temporarily disabled, the timer interrupt to enqueue a process is already late, but the latency between enqueuing and wakeup still may be normal. In this case, an erroneously low latency is recorded. To overcome this limitation and to create a true recording of the apparent latency of every single wakeup procedure, an additional tracer was implemented. This tracer determines the offset between the intended timeout and the real timeout of the timer interrupts; the CONFIG_MISSED_TIMER_OFFSETS_HIST configuration item is used to enable this tracer. For the time being, however, these two tracers work independently; in consequence, the worst-case timer offset and the worst-case wakeup latency cannot simply be added to determine the overall scheduling latency, since the two worst-case values normally are not derived at the same time from the same process. This patch adds the additional latency histogram "timerandwakeup"; it is based on the existing wakeup latency histogram, except that the timer latency is added to the recorded latency value, if the process was enqueued and woken up as a result of an expired wakeup timer. The "timerandwakeup" latency histogram is configured implicitly, if both CONFIG_WAKEUP_LATENCY_HIST and CONFIG_MISSED_TIMER_OFFSETS_HIST are configured, but must be enabled separately at run-time. Signed-off-by: Carsten Emde Signed-off-by: Thomas Gleixner Signed-off-by: Paul Gortmaker --- Documentation/trace/histograms.txt | 36 +++++++++----- include/linux/sched.h | 3 + kernel/trace/Kconfig | 30 +++++++++++- kernel/trace/latency_hist.c | 92 ++++++++++++++++++++++++++++++++++-- 4 files changed, 142 insertions(+), 19 deletions(-) diff --git a/Documentation/trace/histograms.txt b/Documentation/trace/histograms.txt index 6645057..0528201 100644 --- a/Documentation/trace/histograms.txt +++ b/Documentation/trace/histograms.txt @@ -36,14 +36,20 @@ Effective latencies are actually occuring during wakeup of a process. To determine effective latencies, the kernel stores the time stamp when a process is scheduled to be woken up, and determines the duration of the wakeup time shortly before control is passed over to this process. Note -that the apparent latency in user space may be considerably longer, -since -i) interrupts may be disabled preventing the scheduler from initiating -the wakeup mechanism, and -ii) the process may be interrupted after control is passed over to it -but before user space execution takes place. -- Configuration item (in the Kernel hacking/Tracers submenu) +that the apparent latency in user space may be somewhat longer, since the +process may be interrupted after control is passed over to it but before +the execution in user space takes place. Simply measuring the interval +between enqueuing and wakeup may also not appropriate in cases when a +process is scheduled as a result of a timer expiration. The timer may have +missed its deadline, e.g. due to disabled interrupts, but this latency +would not be registered. Therefore, the offsets of missed timers are +recorded in a separate histogram. If both wakeup latency and missed timer +offsets are configured and enabled, a third histogram may be enabled that +records the overall latency as a sum of the timer latency, if any, and the +wakeup latency. This histogram is called "timerandwakeup". +- Configuration items (in the Kernel hacking/Tracers submenu) CONFIG_WAKEUP_LATENCY + CONFIG_MISSED_TIMER_OFSETS * Usage @@ -59,30 +65,34 @@ from shell command line level, or add nodev /sys sysfs defaults 0 0 nodev /sys/kernel/debug debugfs defaults 0 0 -to the file /etc/fstab. All latency histogram related files are +to the file /etc/fstab. All latency histogram related files are then available in the directory /sys/kernel/debug/tracing/latency_hist. A particular histogram type is enabled by writing non-zero to the related variable in the /sys/kernel/debug/tracing/latency_hist/enable directory. Select "preemptirqsoff" for the histograms of potential sources of -latencies and "wakeup" for histograms of effective latencies. The +latencies and "wakeup" for histograms of effective latencies etc. The histogram data - one per CPU - are available in the files /sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx /sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx /sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx -/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx. +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx The histograms are reset by writing non-zero to the file "reset" in a particular latency directory. To reset all latency data, use #!/bin/sh -HISTDIR=/sys/kernel/debug/tracing/latency_hist +TRACINGDIR=/sys/kernel/debug/tracing +HISTDIR=$TRACINGDIR/latency_hist if test -d $HISTDIR then cd $HISTDIR - for i in */reset + for i in `find . | grep /reset$` do echo 1 >$i done @@ -138,7 +148,7 @@ grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0 To only collect wakeup latency data of a particular process, write the PID of the requested process to -/sys/kernel/debug/tracing/latency_hist/wakeup/pid. +/sys/kernel/debug/tracing/latency_hist/wakeup/pid PIDs are not considered, if this variable is set to 0. diff --git a/include/linux/sched.h b/include/linux/sched.h index f52ece2..07756f7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1592,6 +1592,9 @@ struct task_struct { unsigned long trace_recursion; #ifdef CONFIG_WAKEUP_LATENCY_HIST u64 preempt_timestamp_hist; +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST + unsigned long timer_offset; +#endif #endif #endif /* CONFIG_TRACING */ #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e966f40..62c8df8 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -266,10 +266,24 @@ config WAKEUP_LATENCY_HIST /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio + If both Scheduling Latency Histogram and Missed Timer Offsets + Histogram are selected, additional histogram data will be collected + that contain, in addition to the wakeup latency, the timer latency, in + case the wakeup was triggered by an expired timer. These histograms + are available in the + + /sys/kernel/debug/tracing/latency_hist/timerandwakeup + + directory. They reflect the apparent interrupt and scheduling latency + and are best suitable to determine the worst-case latency of a given + system. To enable these histograms, write a non-zero number to + + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup + config MISSED_TIMER_OFFSETS_HIST depends on GENERIC_TIME select GENERIC_TRACER - bool "Missed timer offsets histogram" + bool "Missed Timer Offsets Histogram" help Generate a histogram of missed timer offsets in microseconds. The histograms are disabled by default. To enable them, write a non-zero @@ -281,6 +295,20 @@ config MISSED_TIMER_OFFSETS_HIST /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets + If both Scheduling Latency Histogram and Missed Timer Offsets + Histogram are selected, additional histogram data will be collected + that contain, in addition to the wakeup latency, the timer latency, in + case the wakeup was triggered by an expired timer. These histograms + are available in the + + /sys/kernel/debug/tracing/latency_hist/timerandwakeup + + directory. They reflect the apparent interrupt and scheduling latency + and are best suitable to determine the worst-case latency of a given + system. To enable these histograms, write a non-zero number to + + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup + config SYSPROF_TRACER bool "Sysprof Tracer" depends on X86 diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c index 7c26615..43dba1e 100644 --- a/kernel/trace/latency_hist.c +++ b/kernel/trace/latency_hist.c @@ -36,6 +36,7 @@ enum { WAKEUP_LATENCY, WAKEUP_LATENCY_SHAREDPRIO, MISSED_TIMER_OFFSETS, + TIMERANDWAKEUP_LATENCY, MAX_LATENCY_TYPE, }; @@ -134,6 +135,17 @@ static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc); static unsigned long missed_timer_offsets_pid; #endif +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist); +static char *timerandwakeup_latency_hist_dir = "timerandwakeup"; +static struct enable_data timerandwakeup_enabled_data = { + .latency_type = TIMERANDWAKEUP_LATENCY, + .enabled = 0, +}; +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc); +#endif + void notrace latency_hist(int latency_type, int cpu, unsigned long latency, struct task_struct *p) { @@ -179,15 +191,23 @@ void notrace latency_hist(int latency_type, int cpu, unsigned long latency, mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); break; #endif +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) + case TIMERANDWAKEUP_LATENCY: + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu); + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); + break; +#endif + default: return; } + latency += my_hist->offset; + if (atomic_read(&my_hist->hist_mode) == 0) return; - latency += my_hist->offset; - if (latency < 0 || latency >= MAX_ENTRY_NUM) { if (latency < 0) my_hist->below_hist_bound_samples++; @@ -203,7 +223,8 @@ void notrace latency_hist(int latency_type, int cpu, unsigned long latency, defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) if (latency_type == WAKEUP_LATENCY || latency_type == WAKEUP_LATENCY_SHAREDPRIO || - latency_type == MISSED_TIMER_OFFSETS) { + latency_type == MISSED_TIMER_OFFSETS || + latency_type == TIMERANDWAKEUP_LATENCY) { strncpy(mp->comm, p->comm, sizeof(mp->comm)); mp->pid = task_pid_nr(p); mp->prio = p->prio; @@ -384,6 +405,13 @@ latency_hist_reset(struct file *file, const char __user *a, mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); break; #endif +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) + case TIMERANDWAKEUP_LATENCY: + hist = &per_cpu(timerandwakeup_latency_hist, cpu); + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); + break; +#endif } hist_reset(hist); @@ -391,7 +419,8 @@ latency_hist_reset(struct file *file, const char __user *a, defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) if (latency_type == WAKEUP_LATENCY || latency_type == WAKEUP_LATENCY_SHAREDPRIO || - latency_type == MISSED_TIMER_OFFSETS) { + latency_type == MISSED_TIMER_OFFSETS || + latency_type == TIMERANDWAKEUP_LATENCY) { mp->comm[0] = '\0'; mp->prio = mp->pid = mp->latency = -1; } @@ -567,6 +596,14 @@ do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) } break; #endif +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) + case TIMERANDWAKEUP_LATENCY: + if (!wakeup_latency_enabled_data.enabled || + !missed_timer_offsets_enabled_data.enabled) + return -EINVAL; + break; +#endif default: break; } @@ -615,12 +652,18 @@ do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) per_cpu(wakeup_sharedprio, cpu) = 0; } } +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST + timerandwakeup_enabled_data.enabled = 0; +#endif break; #endif #ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST case MISSED_TIMER_OFFSETS: unregister_trace_hrtimer_interrupt( probe_hrtimer_interrupt); +#ifdef CONFIG_WAKEUP_LATENCY_HIST + timerandwakeup_enabled_data.enabled = 0; +#endif break; #endif default: @@ -861,10 +904,20 @@ static notrace void probe_wakeup_latency_hist_stop(struct rq *rq, if (per_cpu(wakeup_sharedprio, cpu)) { latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, next); per_cpu(wakeup_sharedprio, cpu) = 0; - } else + } else { latency_hist(WAKEUP_LATENCY, cpu, latency, next); +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST + if (timerandwakeup_enabled_data.enabled) { + latency_hist(TIMERANDWAKEUP_LATENCY, cpu, + next->timer_offset + latency, next); + } +#endif + } out_reset: +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST + next->timer_offset = 0; +#endif put_task_struct(cpu_wakeup_task); per_cpu(wakeup_task, cpu) = NULL; out: @@ -888,6 +941,9 @@ static notrace void probe_hrtimer_interrupt(int cpu, long long latency_ns, latency = (unsigned long) div_s64(-latency_ns, 1000); latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, task); +#ifdef CONFIG_WAKEUP_LATENCY_HIST + task->timer_offset = latency; +#endif } } #endif @@ -1035,6 +1091,32 @@ static __init int latency_hist_init(void) enable_root, (void *)&missed_timer_offsets_enabled_data, &enable_fops); #endif + +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir, + latency_hist_root); + for_each_possible_cpu(i) { + sprintf(name, cpufmt, i); + entry = debugfs_create_file(name, 0444, dentry, + &per_cpu(timerandwakeup_latency_hist, i), + &latency_hist_fops); + my_hist = &per_cpu(timerandwakeup_latency_hist, i); + atomic_set(&my_hist->hist_mode, 1); + my_hist->min_lat = 0xFFFFFFFFUL; + + sprintf(name, cpufmt_maxlatproc, i); + mp = &per_cpu(timerandwakeup_maxlatproc, i); + entry = debugfs_create_file(name, 0444, dentry, mp, + &maxlatproc_fops); + mp->prio = mp->pid = mp->latency = -1; + } + entry = debugfs_create_file("reset", 0644, dentry, + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops); + entry = debugfs_create_file("timerandwakeup", 0644, + enable_root, (void *)&timerandwakeup_enabled_data, + &enable_fops); +#endif return 0; } -- 1.7.0.4