aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiroslav Benes <mbenes@suse.cz>2015-06-03 13:44:17 +0200
committerJiri Slaby <jslaby@suse.cz>2015-06-03 17:27:28 +0200
commit0df05c54d519523eeb0ded2651b55b2d0fb2b963 (patch)
treea361af01c501cda9cfcd4f56793f776dc6cbaf07
parent4c95a978db4e89772df0b220f43387ff784a4250 (diff)
downloadkgraft-0df05c54d519523eeb0ded2651b55b2d0fb2b963.tar.gz
kgr: send a fake signal to all blocking tasks
kGraft consistency model is of LEAVE_KERNEL and SWITCH_THREAD. This means that all tasks in the system have to be marked one by one as safe to call a new patched function. Safe place is on the boundary between kernel and userspace. The patching waits for all tasks to cross this boundary and finishes the process afterwards. The problem is that a task can block the finalization of patching process for quite a long time, if not forever. The task could sleep somewhere in the kernel or could be running in the userspace with no prospect of entering the kernel and thus going through the safe place. Luckily we can force the task to do that by sending it a fake signal, that is a signal with no data in signal pending structures (no handler, no sign of proper signal delivered). Suspend/freezer use this to freeze the tasks as well. The task gets TIF_SIGPENDING set and is woken up (if it has been sleeping in the kernel before) or kicked by rescheduling IPI (if it was running on other CPU). This causes the task to go to kernel/userspace boundary where the signal would be handled and the task would be marked as safe in terms of live patching. There are tasks which are not affected by this technique though. The fake signal is not sent to kthreads. They should be handled in a different way. Also if the task is in TASK_RUNNING state but not currently running on some CPU it doesn't get the IPI, but it would eventually handle the signal anyway. Last, if the task runs in the kernel (in TASK_RUNNING state) it gets the IPI, but the signal is not handled on return from the interrupt. It would be handled on return to the userspace in the future when the fake signal is sent again. If the task was sleeping in a syscall it would be woken by our fake signal, it would check if TIF_SIGPENDING is set (by calling signal_pending() predicate) and return ERESTART* or EINTR. Syscalls with ERESTART* return values are restarted in case of the fake signal (see do_signal()). EINTR is propagated back to the userspace program. This could disturb the program, but... * each process dealing with signals should react accordingly to EINTR return values. * syscalls returning EINTR happen to be quite common situation in the system even if no fake signal is sent. * freezer sends the fake signal and does not deal with EINTR anyhow. Thus EINTR values are returned when the system is resumed. The very safe marking is done in entry.S on syscall and interrupt/exception exit paths. Note that the fake signal is not sent to stopped/traced tasks. Such task prevents the patching to finish till it continues again (is not traced anymore). Signed-off-by: Miroslav Benes <mbenes@suse.cz> Signed-off-by: Jiri Slaby <jslaby@suse.cz>
-rw-r--r--kernel/kgraft.c22
-rw-r--r--kernel/signal.c3
2 files changed, 24 insertions, 1 deletions
diff --git a/kernel/kgraft.c b/kernel/kgraft.c
index 639ee05b0403e0..5d789245624aaa 100644
--- a/kernel/kgraft.c
+++ b/kernel/kgraft.c
@@ -268,6 +268,26 @@ static void kgr_finalize(void)
mutex_unlock(&kgr_in_progress_lock);
}
+static void kgr_send_fake_signal(void)
+{
+ struct task_struct *p, *t;
+
+ read_lock(&tasklist_lock);
+ for_each_process_thread(p, t) {
+ /*
+ * send fake signal to all non-kthread tasks which are still
+ * not migrated. kthreads should be migrated now.
+ */
+ if ((t->flags & PF_KTHREAD) || !kgr_task_in_progress(t))
+ continue;
+
+ spin_lock_irq(&t->sighand->siglock);
+ signal_wake_up(t, 0);
+ spin_unlock_irq(&t->sighand->siglock);
+ }
+ read_unlock(&tasklist_lock);
+}
+
static void kgr_work_fn(struct work_struct *work)
{
static bool printed = false;
@@ -279,6 +299,8 @@ static void kgr_work_fn(struct work_struct *work)
KGR_TIMEOUT);
printed = true;
}
+ /* send fake signal */
+ kgr_send_fake_signal();
/* recheck again later */
queue_delayed_work(kgr_wq, &kgr_work, KGR_TIMEOUT * HZ);
return;
diff --git a/kernel/signal.c b/kernel/signal.c
index 8f0876f9f6dd5d..6a5bc81562f8cc 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -157,7 +157,8 @@ void recalc_sigpending_and_wake(struct task_struct *t)
void recalc_sigpending(void)
{
- if (!recalc_sigpending_tsk(current) && !freezing(current))
+ if (!recalc_sigpending_tsk(current) && !freezing(current) &&
+ !kgr_task_in_progress(current))
clear_thread_flag(TIF_SIGPENDING);
}