diff -urNp rcu-poll-ref/include/linux/rcupdate.h rcu-poll/include/linux/rcupdate.h --- rcu-poll-ref/include/linux/rcupdate.h Thu Jan 1 01:00:00 1970 +++ rcu-poll/include/linux/rcupdate.h Wed May 29 20:29:24 2002 @@ -0,0 +1,59 @@ +/* + * Read-Copy Update mechanism for mutual exclusion + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (c) International Business Machines Corp., 2001 + * + * Author: Dipankar Sarma + * + * Based on the original work by Paul McKenney + * and inputs from Andrea Arcangeli, Rusty Russell, Andi Kleen etc. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of Read-Copy Update mechanism see - + * http://lse.sourceforge.net/locking/rcupdate.html + * + */ + +#ifndef __LINUX_RCUPDATE_H +#define __LINUX_RCUPDATE_H + +#include + +/* + * Callback structure for use with call_rcu(). + */ +struct rcu_head { + struct list_head list; + void (*func)(void *obj); + void *arg; +}; + +#define RCU_HEAD_INIT(head) { LIST_HEAD_INIT(head.list), NULL, NULL } +#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT(head) +#define INIT_RCU_HEAD(ptr) do { \ + INIT_LIST_HEAD(&(ptr)->list); (ptr)->func = NULL; (ptr)->arg = NULL; \ +} while (0) + + +extern void FASTCALL(call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg)); +extern void synchronize_kernel(void); + +extern void rcu_init(void); + +#endif /* __LINUX_RCUPDATE_H */ diff -urNp rcu-poll-ref/include/linux/sched.h rcu-poll/include/linux/sched.h --- rcu-poll-ref/include/linux/sched.h Wed May 29 20:29:11 2002 +++ rcu-poll/include/linux/sched.h Wed May 29 20:29:24 2002 @@ -163,39 +163,7 @@ extern int schedule_task(struct tq_struc extern void flush_scheduled_tasks(void); extern int start_context_thread(void); extern int current_is_keventd(void); - -/* - * Priority of a process goes from 0..MAX_PRIO-1, valid RT - * priority is 0..MAX_RT_PRIO-1, and SCHED_OTHER tasks are - * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values - * are inverted: lower p->prio value means higher priority. - * - * The MAX_RT_USER_PRIO value allows the actual maximum - * RT priority to be separate from the value exported to - * user-space. This allows kernel threads to set their - * priority to a value higher than any user task. Note: - * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. - * - * Both values are configurable at compile-time. - */ - -#if CONFIG_MAX_USER_RT_PRIO < 100 -#define MAX_USER_RT_PRIO 100 -#elif CONFIG_MAX_USER_RT_PRIO > 1000 -#define MAX_USER_RT_PRIO 1000 -#else -#define MAX_USER_RT_PRIO CONFIG_MAX_USER_RT_PRIO -#endif - -#if CONFIG_MAX_RT_PRIO < 0 -#define MAX_RT_PRIO MAX_USER_RT_PRIO -#elif CONFIG_MAX_RT_PRIO > 200 -#define MAX_RT_PRIO (MAX_USER_RT_PRIO + 200) -#else -#define MAX_RT_PRIO (MAX_USER_RT_PRIO + CONFIG_MAX_RT_PRIO) -#endif - -#define MAX_PRIO (MAX_RT_PRIO + 40) +extern void force_cpu_reschedule(int cpu); /* * The maximum RT priority is configurable. If the resulting diff -urNp rcu-poll-ref/include/linux/sched_runqueue.h rcu-poll/include/linux/sched_runqueue.h --- rcu-poll-ref/include/linux/sched_runqueue.h Thu Jan 1 01:00:00 1970 +++ rcu-poll/include/linux/sched_runqueue.h Wed May 29 20:29:27 2002 @@ -0,0 +1,80 @@ +#ifndef _LINUX_SCHED_RUNQUEUE_H +#define _LINUX_SCHED_RUNQUEUE_H + +#include + +/* + * Priority of a process goes from 0..MAX_PRIO-1, valid RT + * priority is 0..MAX_RT_PRIO-1, and SCHED_OTHER tasks are + * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values + * are inverted: lower p->prio value means higher priority. + * + * The MAX_RT_USER_PRIO value allows the actual maximum + * RT priority to be separate from the value exported to + * user-space. This allows kernel threads to set their + * priority to a value higher than any user task. Note: + * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. + * + * Both values are configurable at compile-time. + */ + +#if CONFIG_MAX_USER_RT_PRIO < 100 +#define MAX_USER_RT_PRIO 100 +#elif CONFIG_MAX_USER_RT_PRIO > 1000 +#define MAX_USER_RT_PRIO 1000 +#else +#define MAX_USER_RT_PRIO CONFIG_MAX_USER_RT_PRIO +#endif + +#if CONFIG_MAX_RT_PRIO < 0 +#define MAX_RT_PRIO MAX_USER_RT_PRIO +#elif CONFIG_MAX_RT_PRIO > 200 +#define MAX_RT_PRIO (MAX_USER_RT_PRIO + 200) +#else +#define MAX_RT_PRIO (MAX_USER_RT_PRIO + CONFIG_MAX_RT_PRIO) +#endif + +#define MAX_PRIO (MAX_RT_PRIO + 40) + +/* + * These are the runqueue data structures: + */ + +#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long)) + +struct prio_array { + int nr_active; + unsigned long bitmap[BITMAP_SIZE]; + list_t queue[MAX_PRIO]; +}; + +/* + * This is the main, per-CPU runqueue data structure. + * + * Locking rule: those places that want to lock multiple runqueues + * (such as the load balancing or the process migration code), lock + * acquire operations must be ordered by ascending &runqueue. + */ +struct runqueue { + spinlock_t lock; + spinlock_t frozen; + unsigned long nr_running, nr_switches, expired_timestamp; + long quiescent; /* RCU */ + task_t *curr, *idle; + prio_array_t *active, *expired, arrays[2]; + int prev_nr_running[NR_CPUS]; + task_t *migration_thread; + list_t migration_queue; +} ____cacheline_aligned; + +typedef struct runqueue runqueue_t; +extern runqueue_t runqueues[]; + +#define cpu_rq(cpu) (runqueues + (cpu)) +#define this_rq() cpu_rq(smp_processor_id()) +#define task_rq(p) cpu_rq((p)->cpu) +#define cpu_curr(cpu) (cpu_rq(cpu)->curr) +#define rt_task(p) ((p)->prio < MAX_RT_PRIO) +#define RCU_quiescent(cpu) (cpu_rq(cpu)->quiescent) + +#endif /* _LINUX_SCHED_RUNQUEUE_H */ diff -urNp rcu-poll-ref/init/main.c rcu-poll/init/main.c --- rcu-poll-ref/init/main.c Wed May 29 20:29:07 2002 +++ rcu-poll/init/main.c Wed May 29 20:29:24 2002 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -346,6 +347,7 @@ asmlinkage void __init start_kernel(void printk("Kernel command line: %s\n", saved_command_line); parse_options(command_line); trap_init(); + rcu_init(); init_IRQ(); sched_init(); softirq_init(); diff -urNp rcu-poll-ref/kernel/Makefile rcu-poll/kernel/Makefile --- rcu-poll-ref/kernel/Makefile Tue Jan 22 18:53:55 2002 +++ rcu-poll/kernel/Makefile Wed May 29 20:29:24 2002 @@ -9,12 +9,12 @@ O_TARGET := kernel.o -export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o +export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o rcupdate.o obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \ module.o exit.o itimer.o info.o time.o softirq.o resource.o \ sysctl.o acct.o capability.o ptrace.o timer.o user.o \ - signal.o sys.o kmod.o context.o + signal.o sys.o kmod.o context.o rcupdate.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += ksyms.o diff -urNp rcu-poll-ref/kernel/rcupdate.c rcu-poll/kernel/rcupdate.c --- rcu-poll-ref/kernel/rcupdate.c Thu Jan 1 01:00:00 1970 +++ rcu-poll/kernel/rcupdate.c Wed May 29 20:29:24 2002 @@ -0,0 +1,211 @@ +/* + * Read-Copy Update mechanism for mutual exclusion + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (c) International Business Machines Corp., 2001 + * Copyright (C) Andrea Arcangeli SuSE, 2001 + * + * Author: Dipankar Sarma , + * Andrea Arcangeli + * + * Based on the original work by Paul McKenney + * and inputs from Andrea Arcangeli, Rusty Russell, Andi Kleen etc. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of Read-Copy Update mechanism see - + * http://lse.sourceforge.net/locking/rcupdate.html + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Definition for rcupdate control block. */ +static spinlock_t rcu_lock; +static struct list_head rcu_nxtlist; +static struct list_head rcu_curlist; +static struct tasklet_struct rcu_tasklet; +static unsigned long rcu_qsmask; +static int rcu_polling_in_progress; +static long rcu_quiescent_checkpoint[NR_CPUS]; + +/* + * Register a new rcu callback. This will be invoked as soon + * as all CPUs have performed a context switch or been seen in the + * idle loop or in a user process. + */ +void call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg) +{ + head->func = func; + head->arg = arg; + + spin_lock_bh(&rcu_lock); + list_add(&head->list, &rcu_nxtlist); + spin_unlock_bh(&rcu_lock); + + tasklet_hi_schedule(&rcu_tasklet); +} + +static int rcu_prepare_polling(void) +{ + int stop; + int i; + +#ifdef DEBUG + if (!list_empty(&rcu_curlist)) + BUG(); +#endif + + stop = 1; + if (!list_empty(&rcu_nxtlist)) { + list_splice(&rcu_nxtlist, &rcu_curlist); + INIT_LIST_HEAD(&rcu_nxtlist); + + rcu_polling_in_progress = 1; + + for (i = 0; i < smp_num_cpus; i++) { + int cpu = cpu_logical_map(i); + + rcu_qsmask |= 1UL << cpu; + rcu_quiescent_checkpoint[cpu] = RCU_quiescent(cpu); + force_cpu_reschedule(cpu); + } + stop = 0; + } + + return stop; +} + +/* + * Invoke the completed RCU callbacks. + */ +static void rcu_invoke_callbacks(void) +{ + struct list_head *entry; + struct rcu_head *head; + +#ifdef DEBUG + if (list_empty(&rcu_curlist)) + BUG(); +#endif + + entry = rcu_curlist.prev; + do { + head = list_entry(entry, struct rcu_head, list); + entry = entry->prev; + + head->func(head->arg); + } while (entry != &rcu_curlist); + + INIT_LIST_HEAD(&rcu_curlist); +} + +static int rcu_completion(void) +{ + int stop; + + rcu_polling_in_progress = 0; + rcu_invoke_callbacks(); + + stop = rcu_prepare_polling(); + + return stop; +} + +static int rcu_polling(void) +{ + int i; + int stop; + + for (i = 0; i < smp_num_cpus; i++) { + int cpu = cpu_logical_map(i); + + if (rcu_qsmask & (1UL << cpu)) + if (rcu_quiescent_checkpoint[cpu] != RCU_quiescent(cpu)) + rcu_qsmask &= ~(1UL << cpu); + } + + stop = 0; + if (!rcu_qsmask) + stop = rcu_completion(); + + return stop; +} + +/* + * Look into the per-cpu callback information to see if there is + * any processing necessary - if so do it. + */ +static void rcu_process_callbacks(unsigned long data) +{ + int stop; + + spin_lock(&rcu_lock); + if (!rcu_polling_in_progress) + stop = rcu_prepare_polling(); + else + stop = rcu_polling(); + spin_unlock(&rcu_lock); + + if (!stop) + tasklet_hi_schedule(&rcu_tasklet); +} + +/* Because of FASTCALL declaration of complete, we use this wrapper */ +static void wakeme_after_rcu(void *completion) +{ + complete(completion); +} + +/* + * Initializes rcu mechanism. Assumed to be called early. + * That is before local timer(SMP) or jiffie timer (uniproc) is setup. + */ +void __init rcu_init(void) +{ + tasklet_init(&rcu_tasklet, rcu_process_callbacks, 0UL); + INIT_LIST_HEAD(&rcu_nxtlist); + INIT_LIST_HEAD(&rcu_curlist); + spin_lock_init(&rcu_lock); +} + +/* + * Wait until all the CPUs have gone through a "quiescent" state. + */ +void synchronize_kernel(void) +{ + struct rcu_head rcu; + DECLARE_COMPLETION(completion); + + /* Will wake me after RCU finished */ + call_rcu(&rcu, wakeme_after_rcu, &completion); + + /* Wait for it */ + wait_for_completion(&completion); +} + +EXPORT_SYMBOL(call_rcu); +EXPORT_SYMBOL(synchronize_kernel); diff -urNp rcu-poll-ref/kernel/sched.c rcu-poll/kernel/sched.c --- rcu-poll-ref/kernel/sched.c Wed May 29 20:29:11 2002 +++ rcu-poll/kernel/sched.c Wed May 29 20:29:24 2002 @@ -24,6 +24,7 @@ #include #include #include +#include /* * Convert user-nice values [ -20 ... 0 ... 19 ] @@ -110,45 +111,7 @@ #define TASK_TIMESLICE(p) (MIN_TIMESLICE + \ ((MAX_TIMESLICE - MIN_TIMESLICE) * (MAX_PRIO-1-(p)->static_prio)/39)) -/* - * These are the runqueue data structures: - */ - -#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long)) - -typedef struct runqueue runqueue_t; - -struct prio_array { - int nr_active; - unsigned long bitmap[BITMAP_SIZE]; - list_t queue[MAX_PRIO]; -}; - -/* - * This is the main, per-CPU runqueue data structure. - * - * Locking rule: those places that want to lock multiple runqueues - * (such as the load balancing or the process migration code), lock - * acquire operations must be ordered by ascending &runqueue. - */ -struct runqueue { - spinlock_t lock; - spinlock_t frozen; - unsigned long nr_running, nr_switches, expired_timestamp; - task_t *curr, *idle; - prio_array_t *active, *expired, arrays[2]; - int prev_nr_running[NR_CPUS]; - task_t *migration_thread; - list_t migration_queue; -} ____cacheline_aligned; - -static struct runqueue runqueues[NR_CPUS] __cacheline_aligned; - -#define cpu_rq(cpu) (runqueues + (cpu)) -#define this_rq() cpu_rq(smp_processor_id()) -#define task_rq(p) cpu_rq((p)->cpu) -#define cpu_curr(cpu) (cpu_rq(cpu)->curr) -#define rt_task(p) ((p)->prio < MAX_RT_PRIO) +runqueue_t runqueues[NR_CPUS] __cacheline_aligned; static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) { @@ -737,6 +700,7 @@ need_resched: rq = this_rq(); release_kernel_lock(prev, smp_processor_id()); + rq->quiescent++; prev->sleep_timestamp = jiffies; spin_lock_irq(&rq->lock); @@ -810,6 +774,26 @@ switch_tasks: return; } +void force_cpu_reschedule(int cpu) +{ + struct runqueue *rq, *newrq; + struct task_struct *p; + unsigned long flags; + + for (;;) { + rq = cpu_rq(cpu); + p = rq->curr; + + newrq = task_rq_lock(p, &flags); + if (newrq == rq) + resched_task(p); + task_rq_unlock(newrq, &flags); + + if (newrq == rq) + break; + } +} + /* * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve