summaryrefslogtreecommitdiffstats
path: root/rtmutex-prevent-missed-wakeups.patch
blob: 66cd1d50c2f0f353be3d7d0ee7a09fdcd4b864ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
From 3979534e254ae61ee6794ede903d50dcaa288d10 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Jul 2009 08:44:24 -0500
Subject: [PATCH] rtmutex: prevent missed wakeups

commit 487ac70841b3165e207442b071dc943147612de6 in tip.

The sleeping locks implementation based on rtmutexes can miss wakeups
for two reasons:

1) The unconditional usage TASK_UNINTERRUPTIBLE for the blocking state

   Results in missed wakeups from wake_up_interruptible*()

   state = TASK_INTERRUPTIBLE;
   blocks_on_lock()
     state = TASK_UNINTERRUPTIBLE;
     schedule();
     ....
     acquires_lock();
     restore_state();

   Until the waiter has restored its state wake_up_interruptible*() will
   fail.

2) The rtmutex wakeup intermediate state TASK_RUNNING_MUTEX

   Results in missed wakeups from wake_up*()

   waiter is woken by mutex wakeup
   	  waiter->state = TASK_RUNNING_MUTEX;
   ....
   acquires_lock();
   restore_state();

   Until the waiter has restored its state wake_up*() will fail.

Solution:

Instead of setting the state to TASK_RUNNING_MUTEX in the mutex wakeup
case we logically OR TASK_RUNNING_MUTEX to the current waiter
state. This keeps the original bits (TASK_INTERRUPTIBLE /
TASK_UNINTERRUPTIBLE) intact and lets wakeups succeed. When a task
blocks on a lock in state TASK_INTERRUPTIBLE and is woken up by a real
wakeup, then we store the state = TASK_RUNNING for the restore and can
safely use TASK_UNINTERRUPTIBLE from that point to avoid further
wakeups which just let us loop in the lock code.

This also removes the extra TASK_RUNNING_MUTEX flags from the
wakeup_process*() functions as they are not longer necessary.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/sched.h |    3 +--
 kernel/rtmutex.c      |   22 +++++++++++++++++++---
 kernel/sched.c        |   17 ++++++++++++-----
 3 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1a14be1..42180c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -214,8 +214,7 @@ extern char ___assert_task_state[1 - 2*!!(
 
 /* Convenience macros for the sake of wake_up */
 #define TASK_NORMAL		(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
-#define TASK_ALL		(TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED | \
-				 TASK_RUNNING_MUTEX)
+#define TASK_ALL		(TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
 
 /* get_task_state() */
 #define TASK_REPORT		(TASK_RUNNING | TASK_RUNNING_MUTEX | \
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a4c8b7e..900f11c 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -729,16 +729,32 @@ static int adaptive_wait(struct rt_mutex_waiter *waiter,
 /*
  * The state setting needs to preserve the original state and needs to
  * take care of non rtmutex wakeups.
+ *
+ * Called with rtmutex->wait_lock held to serialize against rtmutex
+ * wakeups().
  */
 static inline unsigned long
 rt_set_current_blocked_state(unsigned long saved_state)
 {
-	unsigned long state;
+	unsigned long state, block_state;
+
+	/*
+	 * If state is TASK_INTERRUPTIBLE, then we set the state for
+	 * blocking to TASK_INTERRUPTIBLE as well, otherwise we would
+	 * miss real wakeups via wake_up_interruptible(). If such a
+	 * wakeup happens we see the running state and preserve it in
+	 * saved_state. Now we can ignore further wakeups as we will
+	 * return in state running from our "spin" sleep.
+	 */
+	if (saved_state == TASK_INTERRUPTIBLE)
+		block_state = TASK_INTERRUPTIBLE;
+	else
+		block_state = TASK_UNINTERRUPTIBLE;
 
-	state = xchg(&current->state, TASK_UNINTERRUPTIBLE);
+	state = xchg(&current->state, block_state);
 	/*
 	 * Take care of non rtmutex wakeups. rtmutex wakeups
-	 * set the state to TASK_RUNNING_MUTEX.
+	 * or TASK_RUNNING_MUTEX to (UN)INTERRUPTIBLE.
 	 */
 	if (state == TASK_RUNNING)
 		saved_state = TASK_RUNNING;
diff --git a/kernel/sched.c b/kernel/sched.c
index e0f6fe4..094cb0c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2489,8 +2489,16 @@ out_running:
 	trace_sched_wakeup(rq, p, success);
 	check_preempt_curr(rq, p, wake_flags);
 
+	/*
+	 * For a mutex wakeup we or TASK_RUNNING_MUTEX to the task
+	 * state to preserve the original state, so a real wakeup
+	 * still can see the (UN)INTERRUPTIBLE bits in the state check
+	 * above. We dont have to worry about the | TASK_RUNNING_MUTEX
+	 * here. The waiter is serialized by the mutex lock and nobody
+	 * else can fiddle with p->state as we hold rq lock.
+	 */
 	if (mutex)
-		p->state = TASK_RUNNING_MUTEX;
+		p->state |= TASK_RUNNING_MUTEX;
 	else
 		p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
@@ -2552,7 +2560,7 @@ EXPORT_SYMBOL(wake_up_process_mutex_sync);
 
 int wake_up_state(struct task_struct *p, unsigned int state)
 {
-	return try_to_wake_up(p, state | TASK_RUNNING_MUTEX, 0, 0);
+	return try_to_wake_up(p, state, 0, 0);
 }
 
 /*
@@ -3764,7 +3772,7 @@ need_resched_nonpreemptible:
 	update_rq_clock(rq);
 	clear_tsk_need_resched(prev);
 
-	if ((prev->state & ~TASK_RUNNING_MUTEX) &&
+	if (!(prev->state & TASK_RUNNING_MUTEX) && prev->state &&
 	    !(preempt_count() & PREEMPT_ACTIVE)) {
 		if (unlikely(signal_pending_state(prev->state, prev)))
 			prev->state = TASK_RUNNING;
@@ -3966,8 +3974,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
 int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
 			  void *key)
 {
-	return try_to_wake_up(curr->private, mode | TASK_RUNNING_MUTEX,
-			      wake_flags, 0);
+	return try_to_wake_up(curr->private, mode, wake_flags, 0);
 }
 EXPORT_SYMBOL(default_wake_function);
 
-- 
1.7.0.4