summaryrefslogtreecommitdiffstats
path: root/perf_events-defer-poll-wakeups-to-softirq-on-RT.patch
blob: 0e16b0a3dafd980920026277e380eb93043c0578 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
From 7557407b1db8e39dbba02c5729869de302598e06 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 Aug 2009 15:51:05 +0200
Subject: [PATCH] perf_events: defer poll() wakeups to softirq on RT

commit 0a764c16d61b4d5ec9a9fadae931c79484a274cd in tip, also see
merge artefact 2e5f15f4c51efd5fdb1ea97380c38d3f8c32d3e9 in tip.

Normally pending work is work that cannot be done from NMI context, such
as wakeups and disabling the counter. The pending work is a single
linked list using atomic ops so that it functions from NMI context.

Normally this is called from IRQ context through use of an self-IPI
(x86) or upon enabling hard interrupts (powerpc). Architectures that do
not implement perf_event_set_pending() nor call
perf_event_do_pending() upon leaving NMI context will get a polling
fallback from the timer softirq.

However, in -rt we cannot do the wakeup from IRQ context because its a
wait_queue wakup, which can be O(n), so defer all wakeups to the softirq
fallback by creating a second pending list that's only processed from
there.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
LKML-Reference: <1251208265.7538.1157.camel@twins>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/perf_event.h |    5 +++
 kernel/perf_event.c        |   59 +++++++++++++++++++++++++++++++++++--------
 kernel/timer.c             |    1 +
 3 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c8e3754..130bd40 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -654,6 +654,9 @@ struct perf_event {
 	int				pending_kill;
 	int				pending_disable;
 	struct perf_pending_entry	pending;
+#ifdef CONFIG_PREEMPT_RT
+	struct perf_pending_entry	pending_softirq;
+#endif
 
 	atomic_t			event_limit;
 
@@ -762,6 +765,7 @@ extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void set_perf_event_pending(void);
 extern void perf_event_do_pending(void);
+extern void perf_event_do_pending_softirq(void);
 extern void perf_event_print_debug(void);
 extern void __perf_disable(void);
 extern bool __perf_enable(void);
@@ -959,6 +963,7 @@ static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
 static inline void perf_event_do_pending(void)				{ }
+static inline void perf_event_do_pending_softirq(void)		{ }
 static inline void perf_event_print_debug(void)				{ }
 static inline void perf_disable(void)					{ }
 static inline void perf_enable(void)					{ }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3d1552d..dd73870 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2687,11 +2687,26 @@ static void perf_pending_event(struct perf_pending_entry *entry)
 		__perf_event_disable(event);
 	}
 
+#ifndef CONFIG_PREEMPT_RT
 	if (event->pending_wakeup) {
 		event->pending_wakeup = 0;
 		perf_event_wakeup(event);
 	}
+#endif
+}
+
+#ifdef CONFIG_PREEMPT_RT
+static void perf_pending_counter_softirq(struct perf_pending_entry *entry)
+{
+	struct perf_event *counter = container_of(entry,
+			struct perf_event, pending_softirq);
+
+	if (counter->pending_wakeup) {
+		counter->pending_wakeup = 0;
+		perf_event_wakeup(counter);
+	}
 }
+#endif
 
 #define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
 
@@ -2699,33 +2714,42 @@ static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
 	PENDING_TAIL,
 };
 
-static void perf_pending_queue(struct perf_pending_entry *entry,
-			       void (*func)(struct perf_pending_entry *))
-{
-	struct perf_pending_entry **head;
+static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_softirq_head) = {
+	PENDING_TAIL,
+};
 
+static void __perf_pending_queue(struct perf_pending_entry **head,
+				 struct perf_pending_entry *entry,
+			         void (*func)(struct perf_pending_entry *))
+{
 	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
 		return;
 
 	entry->func = func;
 
-	head = &get_cpu_var(perf_pending_head);
-
 	do {
 		entry->next = *head;
 	} while (cmpxchg(head, entry->next, entry) != entry->next);
+}
 
-	set_perf_event_pending();
+static void perf_pending_queue(struct perf_pending_entry *entry,
+			       void (*func)(struct perf_pending_entry *))
+{
+	struct perf_pending_entry **head;
 
+	head = &get_cpu_var(perf_pending_head);
+	__perf_pending_queue(head, entry, func);
 	put_cpu_var(perf_pending_head);
+
+	set_perf_event_pending();
 }
 
-static int __perf_pending_run(void)
+static int __perf_pending_run(struct perf_pending_entry **head)
 {
 	struct perf_pending_entry *list;
 	int nr = 0;
 
-	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
+	list = xchg(head, PENDING_TAIL);
 	while (list != PENDING_TAIL) {
 		void (*func)(struct perf_pending_entry *);
 		struct perf_pending_entry *entry = list;
@@ -2755,7 +2779,8 @@ static inline int perf_not_pending(struct perf_event *event)
 	 * need to wait.
 	 */
 	get_cpu();
-	__perf_pending_run();
+	__perf_pending_run(&__get_cpu_var(perf_pending_head));
+	__perf_pending_run(&__get_cpu_var(perf_pending_softirq_head));
 	put_cpu();
 
 	/*
@@ -2773,7 +2798,13 @@ static void perf_pending_sync(struct perf_event *event)
 
 void perf_event_do_pending(void)
 {
-	__perf_pending_run();
+	__perf_pending_run(&__get_cpu_var(perf_pending_head));
+}
+
+void perf_event_do_pending_softirq(void)
+{
+	__perf_pending_run(&__get_cpu_var(perf_pending_head));
+	__perf_pending_run(&__get_cpu_var(perf_pending_softirq_head));
 }
 
 /*
@@ -2817,12 +2848,18 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
 {
 	atomic_set(&handle->data->poll, POLL_IN);
 
+#ifndef CONFIG_PREEMPT_RT
 	if (handle->nmi) {
 		handle->event->pending_wakeup = 1;
 		perf_pending_queue(&handle->event->pending,
 				   perf_pending_event);
 	} else
 		perf_event_wakeup(handle->event);
+#else
+	__perf_pending_queue(&__get_cpu_var(perf_pending_softirq_head),
+			&handle->event->pending_softirq,
+			perf_pending_counter_softirq);
+#endif
 }
 
 /*
diff --git a/kernel/timer.c b/kernel/timer.c
index 7fcbfd7..e107f35 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1280,6 +1280,7 @@ static void run_timer_softirq(struct softirq_action *h)
 
 	printk_tick();
 	hrtimer_run_pending();
+	perf_event_do_pending_softirq();
 
 	if (time_after_eq(jiffies, base->timer_jiffies))
 		__run_timers(base);
-- 
1.7.0.4