From: Rusty Russell These two patches provide the framework for stopping kernel threads to allow hotplug CPU. This one just adds kthread.c and kthread.h, next one uses it. Most importantly, adds a Monty Python quote to the kernel. Details: The hotplug CPU code introduces two major problems: 1) Threads which previously never stopped (migration thread, ksoftirqd, keventd) have to be stopped cleanly as CPUs go offline. 2) Threads which previously never had to be created now have to be created when a CPU goes online. Unfortunately, stopping a thread is fairly baroque, involving memory barriers, a completion and spinning until the task is actually dead (for example, complete_and_exit() must be used if inside a module). There are also three problems in starting a thread: 1) Doing it from a random process context risks environment contamination: better to do it from keventd to guarantee a clean environment, a-la call_usermodehelper. 2) Getting the task struct without races is a hard: see kernel/sched.c migration_call(), kernel/workqueue.c create_workqueue_thread(). 3) There are races in starting a thread for a CPU which is not yet online: migration thread does a complex dance at the moment for a similar reason (there may be no migration thread to migrate us). Place all this logic in some primitives to make life easier: kthread_create() and kthread_stop(). These primitives require no extra data-structures in the caller: they operate on normal "struct task_struct"s. Other changes: - Expose keventd_up(), as keventd and migration threads will use kthread to launch, and kthread normally uses workqueues and must recognize this case. --- include/linux/kthread.h | 71 +++++++++++++++++++ include/linux/workqueue.h | 1 kernel/Makefile | 3 kernel/kthread.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/workqueue.c | 5 + 5 files changed, 249 insertions(+), 1 deletion(-) diff -puN /dev/null include/linux/kthread.h --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/include/linux/kthread.h 2004-01-07 23:28:36.000000000 -0800 @@ -0,0 +1,71 @@ +#ifndef _LINUX_KTHREAD_H +#define _LINUX_KTHREAD_H +/* Simple interface for creating and stopping kernel threads without mess. */ +#include +#include + +/** + * kthread_create: create a kthread. + * @threadfn: the function to run until signal_pending(current). + * @data: data ptr for @threadfn. + * @namefmt: printf-style name for the thread. + * + * Description: This helper function creates and names a kernel + * thread. The thread will be stopped: use wake_up_process() to start + * it. See also kthread_run(), kthread_create_on_cpu(). + * + * When woken, the thread will run @threadfn() with @data as its + * argument. @threadfn can either call do_exit() directly if it is a + * standalone thread for which noone will call kthread_stop(), or + * return when 'signal_pending(current)' is true (which means + * kthread_stop() has been called). The return value should be zero + * or a negative error number: it will be passed to kthread_stop(). + * + * Returns a task_struct or ERR_PTR(-ENOMEM). + */ +struct task_struct *kthread_create(int (*threadfn)(void *data), + void *data, + const char namefmt[], ...); + +/** + * kthread_run: create and wake a thread. + * @threadfn: the function to run until signal_pending(current). + * @data: data ptr for @threadfn. + * @namefmt: printf-style name for the thread. + * + * Description: Convenient wrapper for kthread_create() followed by + * wake_up_process(). Returns the kthread, or ERR_PTR(-ENOMEM). */ +#define kthread_run(threadfn, data, namefmt, ...) \ +({ \ + struct task_struct *__k \ + = kthread_create(threadfn, data, namefmt, ## __VA_ARGS__); \ + if (!IS_ERR(__k)) \ + wake_up_process(__k); \ + __k; \ +}) + +/** + * kthread_bind: bind a just-created kthread to a cpu. + * @k: thread created by kthread_create(). + * @cpu: cpu (might not be online, must be possible) for @k to run on. + * + * Description: This function is equivalent to set_cpus_allowed(), + * except that @cpu doesn't need to be online, and the thread must be + * stopped (ie. just returned from kthread_create(). + */ +void kthread_bind(struct task_struct *k, unsigned int cpu); + +/** + * kthread_stop: stop a thread created by kthread_create(). + * @k: thread created by kthread_create(). + * + * Sends a signal to @k, and waits for it to exit. Your threadfn() + * must not call do_exit() itself if you use this function! This can + * also be called after kthread_create() instead of calling + * wake_up_process(): the thread will exit without calling threadfn(). + * + * Returns the result of threadfn(), or -EINTR if wake_up_process() + * was never called. */ +int kthread_stop(struct task_struct *k); + +#endif /* _LINUX_KTHREAD_H */ diff -puN include/linux/workqueue.h~kthread-primitive include/linux/workqueue.h --- 25/include/linux/workqueue.h~kthread-primitive 2004-01-07 23:28:36.000000000 -0800 +++ 25-akpm/include/linux/workqueue.h 2004-01-07 23:28:36.000000000 -0800 @@ -60,6 +60,7 @@ extern int FASTCALL(schedule_work(struct extern int FASTCALL(schedule_delayed_work(struct work_struct *work, unsigned long delay)); extern void flush_scheduled_work(void); extern int current_is_keventd(void); +extern int keventd_up(void); extern void init_workqueues(void); diff -puN /dev/null kernel/kthread.c --- /dev/null 2002-08-30 16:31:37.000000000 -0700 +++ 25-akpm/kernel/kthread.c 2004-01-07 23:28:36.000000000 -0800 @@ -0,0 +1,170 @@ +/* Kernel thread helper functions. + * Copyright (C) 2004 IBM Corporation, Rusty Russell. + * + * Everything is done via keventd, so that we get a clean environment + * even if we're invoked from userspace (think modprobe, hotplug cpu, + * etc.). Also, it allows us to wait for dying kthreads without side + * effects involved in adopting kthreads to random processes. + */ +#define __KERNEL_SYSCALLS__ +#include +#include +#include +#include +#include +#include + +struct kthread_create_info +{ + /* Information passed to kthread() from keventd. */ + int (*threadfn)(void *data); + void *data; + struct completion started; + + /* Result passed back to kthread_create() from keventd. */ + struct task_struct *result; + struct completion done; +}; + +/* Returns so that WEXITSTATUS(ret) == errno. */ +static int kthread(void *_create) +{ + struct kthread_create_info *create = _create; + int (*threadfn)(void *data); + void *data; + int ret = -EINTR; + + /* Copy data: it's on keventd's stack */ + threadfn = create->threadfn; + data = create->data; + + /* OK, tell user we're spawned, wait for stop or wakeup */ + __set_current_state(TASK_INTERRUPTIBLE); + complete(&create->started); + schedule(); + + while (!signal_pending(current)) + ret = threadfn(data); + + return (-ret) << 8; +} + +/* We are keventd: create a thread. */ +static void keventd_create_kthread(void *_create) +{ + struct kthread_create_info *create = _create; + int pid; + + /* We want our own signal handler (we take no signals by default). */ + pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); + if (pid < 0) { + create->result = ERR_PTR(pid); + } else { + wait_for_completion(&create->started); + create->result = find_task_by_pid(pid); + wait_task_inactive(create->result); + } + complete(&create->done); +} + +struct kthread_stop_info +{ + struct task_struct *k; + int result; + struct completion done; +}; + +/* "to look upon me as her own dad -- in a very real, and legally + binding sense." - Michael Palin */ +static void adopt_kthread(struct task_struct *k) +{ + write_lock_irq(&tasklist_lock); + REMOVE_LINKS(k); + k->parent = current; + k->real_parent = current; + SET_LINKS(k); + write_unlock_irq(&tasklist_lock); +} + +/* We are keventd: stop the thread. */ +static void keventd_stop_kthread(void *_stop) +{ + struct kthread_stop_info *stop = _stop; + int status; + sigset_t blocked; + struct k_sigaction sa; + + /* Install a handler so SIGCHLD is actually delivered */ + sa.sa.sa_handler = SIG_DFL; + sa.sa.sa_flags = 0; + siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD)); + do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0); + allow_signal(SIGCHLD); + + adopt_kthread(stop->k); + /* All signals are blocked, hence the force. */ + force_sig(SIGTERM, stop->k); + waitpid(stop->k->tgid, &status, __WALL); + stop->result = -((status >> 8) & 0xFF); + complete(&stop->done); + + /* Back to normal: block and flush all signals */ + sigfillset(&blocked); + sigprocmask(SIG_BLOCK, &blocked, NULL); + flush_signals(current); + sa.sa.sa_handler = SIG_IGN; + do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0); + while (waitpid(-1, &status, __WALL|WNOHANG) > 0); +} + +struct task_struct *kthread_create(int (*threadfn)(void *data), + void *data, + const char namefmt[], + ...) +{ + struct kthread_create_info create; + DECLARE_WORK(work, keventd_create_kthread, &create); + + create.threadfn = threadfn; + create.data = data; + init_completion(&create.started); + init_completion(&create.done); + + /* If we're being called to start the first workqueue, we + * can't use keventd. */ + if (!keventd_up()) + work.func(work.data); + else { + schedule_work(&work); + wait_for_completion(&create.done); + } + if (!IS_ERR(create.result)) { + va_list args; + va_start(args, namefmt); + vsnprintf(create.result->comm, sizeof(create.result->comm), + namefmt, args); + va_end(args); + } + + return create.result; +} + +void kthread_bind(struct task_struct *k, unsigned int cpu) +{ + BUG_ON(k->state != TASK_INTERRUPTIBLE); + k->thread_info->cpu = cpu; + k->cpus_allowed = cpumask_of_cpu(cpu); +} + +int kthread_stop(struct task_struct *k) +{ + struct kthread_stop_info stop; + DECLARE_WORK(work, keventd_stop_kthread, &stop); + + stop.k = k; + init_completion(&stop.done); + + schedule_work(&work); + wait_for_completion(&stop.done); + return stop.result; +} diff -puN kernel/Makefile~kthread-primitive kernel/Makefile --- 25/kernel/Makefile~kthread-primitive 2004-01-07 23:28:36.000000000 -0800 +++ 25-akpm/kernel/Makefile 2004-01-07 23:28:36.000000000 -0800 @@ -6,7 +6,8 @@ obj-y = sched.o fork.o exec_domain.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ - rcupdate.o intermodule.o extable.o params.o posix-timers.o + rcupdate.o intermodule.o extable.o params.o posix-timers.o \ + kthread.o obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o diff -puN kernel/workqueue.c~kthread-primitive kernel/workqueue.c --- 25/kernel/workqueue.c~kthread-primitive 2004-01-07 23:28:36.000000000 -0800 +++ 25-akpm/kernel/workqueue.c 2004-01-07 23:28:36.000000000 -0800 @@ -347,6 +347,11 @@ void flush_scheduled_work(void) flush_workqueue(keventd_wq); } +int keventd_up(void) +{ + return keventd_wq != NULL; +} + int current_is_keventd(void) { struct cpu_workqueue_struct *cwq; _