diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/rcupdate.h x/include/linux/rcupdate.h --- x-ref/include/linux/rcupdate.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/linux/rcupdate.h 2003-06-07 12:33:51.000000000 +0200 @@ -0,0 +1,59 @@ +/* + * Read-Copy Update mechanism for mutual exclusion + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (c) International Business Machines Corp., 2001 + * + * Author: Dipankar Sarma + * + * Based on the original work by Paul McKenney + * and inputs from Andrea Arcangeli, Rusty Russell, Andi Kleen etc. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of Read-Copy Update mechanism see - + * http://lse.sourceforge.net/locking/rcupdate.html + * + */ + +#ifndef __LINUX_RCUPDATE_H +#define __LINUX_RCUPDATE_H + +#include + +/* + * Callback structure for use with call_rcu(). + */ +struct rcu_head { + struct list_head list; + void (*func)(void *obj); + void *arg; +}; + +#define RCU_HEAD_INIT(head) { LIST_HEAD_INIT(head.list), NULL, NULL } +#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT(head) +#define INIT_RCU_HEAD(ptr) do { \ + INIT_LIST_HEAD(&(ptr)->list); (ptr)->func = NULL; (ptr)->arg = NULL; \ +} while (0) + + +extern void FASTCALL(call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg)); +extern void synchronize_kernel(void); + +extern void rcu_init(void); + +#endif /* __LINUX_RCUPDATE_H */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/sched.h x/include/linux/sched.h --- x-ref/include/linux/sched.h 2003-06-07 12:33:49.000000000 +0200 +++ x/include/linux/sched.h 2003-06-07 12:33:51.000000000 +0200 @@ -154,39 +154,7 @@ extern int schedule_task(struct tq_struc extern void flush_scheduled_tasks(void); extern int start_context_thread(void); extern int current_is_keventd(void); - -/* - * Priority of a process goes from 0..MAX_PRIO-1, valid RT - * priority is 0..MAX_RT_PRIO-1, and SCHED_OTHER tasks are - * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values - * are inverted: lower p->prio value means higher priority. - * - * The MAX_RT_USER_PRIO value allows the actual maximum - * RT priority to be separate from the value exported to - * user-space. This allows kernel threads to set their - * priority to a value higher than any user task. Note: - * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. - * - * Both values are configurable at compile-time. - */ - -#if CONFIG_MAX_USER_RT_PRIO < 100 -#define MAX_USER_RT_PRIO 100 -#elif CONFIG_MAX_USER_RT_PRIO > 1000 -#define MAX_USER_RT_PRIO 1000 -#else -#define MAX_USER_RT_PRIO CONFIG_MAX_USER_RT_PRIO -#endif - -#if CONFIG_MAX_RT_PRIO < 0 -#define MAX_RT_PRIO MAX_USER_RT_PRIO -#elif CONFIG_MAX_RT_PRIO > 200 -#define MAX_RT_PRIO (MAX_USER_RT_PRIO + 200) -#else -#define MAX_RT_PRIO (MAX_USER_RT_PRIO + CONFIG_MAX_RT_PRIO) -#endif - -#define MAX_PRIO (MAX_RT_PRIO + 40) +extern void force_cpu_reschedule(int cpu); /* * The maximum RT priority is configurable. If the resulting diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/sched_runqueue.h x/include/linux/sched_runqueue.h --- x-ref/include/linux/sched_runqueue.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/linux/sched_runqueue.h 2003-06-07 12:33:51.000000000 +0200 @@ -0,0 +1,80 @@ +#ifndef _LINUX_SCHED_RUNQUEUE_H +#define _LINUX_SCHED_RUNQUEUE_H + +#include + +/* + * Priority of a process goes from 0..MAX_PRIO-1, valid RT + * priority is 0..MAX_RT_PRIO-1, and SCHED_OTHER tasks are + * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values + * are inverted: lower p->prio value means higher priority. + * + * The MAX_RT_USER_PRIO value allows the actual maximum + * RT priority to be separate from the value exported to + * user-space. This allows kernel threads to set their + * priority to a value higher than any user task. Note: + * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. + * + * Both values are configurable at compile-time. + */ + +#if CONFIG_MAX_USER_RT_PRIO < 100 +#define MAX_USER_RT_PRIO 100 +#elif CONFIG_MAX_USER_RT_PRIO > 1000 +#define MAX_USER_RT_PRIO 1000 +#else +#define MAX_USER_RT_PRIO CONFIG_MAX_USER_RT_PRIO +#endif + +#if CONFIG_MAX_RT_PRIO < 0 +#define MAX_RT_PRIO MAX_USER_RT_PRIO +#elif CONFIG_MAX_RT_PRIO > 200 +#define MAX_RT_PRIO (MAX_USER_RT_PRIO + 200) +#else +#define MAX_RT_PRIO (MAX_USER_RT_PRIO + CONFIG_MAX_RT_PRIO) +#endif + +#define MAX_PRIO (MAX_RT_PRIO + 40) + +/* + * These are the runqueue data structures: + */ + +#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long)) + +struct prio_array { + int nr_active; + unsigned long bitmap[BITMAP_SIZE]; + list_t queue[MAX_PRIO]; +}; + +/* + * This is the main, per-CPU runqueue data structure. + * + * Locking rule: those places that want to lock multiple runqueues + * (such as the load balancing or the process migration code), lock + * acquire operations must be ordered by ascending &runqueue. + */ +struct runqueue { + spinlock_t lock; + unsigned long nr_running, nr_switches, expired_timestamp; + long quiescent; /* RCU */ + task_t *curr, *idle; + prio_array_t *active, *expired, arrays[2]; + long nr_uninterruptible; + int prev_nr_running[NR_CPUS]; + task_t *migration_thread; + list_t migration_queue; +} ____cacheline_aligned; + +typedef struct runqueue runqueue_t; +extern runqueue_t runqueues[]; + +#define cpu_rq(cpu) (runqueues + (cpu)) +#define this_rq() cpu_rq(smp_processor_id()) +#define task_rq(p) cpu_rq((p)->cpu) +#define cpu_curr(cpu) (cpu_rq(cpu)->curr) +#define rt_task(p) ((p)->prio < MAX_RT_PRIO) +#define RCU_quiescent(cpu) (cpu_rq(cpu)->quiescent) + +#endif /* _LINUX_SCHED_RUNQUEUE_H */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/init/main.c x/init/main.c --- x-ref/init/main.c 2003-06-07 12:33:50.000000000 +0200 +++ x/init/main.c 2003-06-07 12:33:51.000000000 +0200 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -347,6 +348,7 @@ asmlinkage void __init start_kernel(void printk("Kernel command line: %s\n", saved_command_line); parse_options(command_line); trap_init(); + rcu_init(); init_IRQ(); sched_init(); softirq_init(); diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/Makefile x/kernel/Makefile --- x-ref/kernel/Makefile 2003-06-07 12:33:40.000000000 +0200 +++ x/kernel/Makefile 2003-06-07 12:34:12.000000000 +0200 @@ -9,12 +9,12 @@ O_TARGET := kernel.o -export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o cpufreq.o +export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o cpufreq.o rcupdate.o obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \ module.o exit.o itimer.o info.o time.o softirq.o resource.o \ sysctl.o acct.o capability.o ptrace.o timer.o user.o \ - signal.o sys.o kmod.o context.o + signal.o sys.o kmod.o context.o rcupdate.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += ksyms.o diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/rcupdate.c x/kernel/rcupdate.c --- x-ref/kernel/rcupdate.c 1970-01-01 01:00:00.000000000 +0100 +++ x/kernel/rcupdate.c 2003-06-07 12:33:51.000000000 +0200 @@ -0,0 +1,211 @@ +/* + * Read-Copy Update mechanism for mutual exclusion + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (c) International Business Machines Corp., 2001 + * Copyright (C) Andrea Arcangeli SuSE, 2001 + * + * Author: Dipankar Sarma , + * Andrea Arcangeli + * + * Based on the original work by Paul McKenney + * and inputs from Andrea Arcangeli, Rusty Russell, Andi Kleen etc. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of Read-Copy Update mechanism see - + * http://lse.sourceforge.net/locking/rcupdate.html + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Definition for rcupdate control block. */ +static spinlock_t rcu_lock; +static struct list_head rcu_nxtlist; +static struct list_head rcu_curlist; +static struct tasklet_struct rcu_tasklet; +static unsigned long rcu_qsmask; +static int rcu_polling_in_progress; +static long rcu_quiescent_checkpoint[NR_CPUS]; + +/* + * Register a new rcu callback. This will be invoked as soon + * as all CPUs have performed a context switch or been seen in the + * idle loop or in a user process. + */ +void call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg) +{ + head->func = func; + head->arg = arg; + + spin_lock_bh(&rcu_lock); + list_add(&head->list, &rcu_nxtlist); + spin_unlock_bh(&rcu_lock); + + tasklet_hi_schedule(&rcu_tasklet); +} + +static int rcu_prepare_polling(void) +{ + int stop; + int i; + +#ifdef DEBUG + if (!list_empty(&rcu_curlist)) + BUG(); +#endif + + stop = 1; + if (!list_empty(&rcu_nxtlist)) { + list_splice(&rcu_nxtlist, &rcu_curlist); + INIT_LIST_HEAD(&rcu_nxtlist); + + rcu_polling_in_progress = 1; + + for (i = 0; i < smp_num_cpus; i++) { + int cpu = cpu_logical_map(i); + + rcu_qsmask |= 1UL << cpu; + rcu_quiescent_checkpoint[cpu] = RCU_quiescent(cpu); + force_cpu_reschedule(cpu); + } + stop = 0; + } + + return stop; +} + +/* + * Invoke the completed RCU callbacks. + */ +static void rcu_invoke_callbacks(void) +{ + struct list_head *entry; + struct rcu_head *head; + +#ifdef DEBUG + if (list_empty(&rcu_curlist)) + BUG(); +#endif + + entry = rcu_curlist.prev; + do { + head = list_entry(entry, struct rcu_head, list); + entry = entry->prev; + + head->func(head->arg); + } while (entry != &rcu_curlist); + + INIT_LIST_HEAD(&rcu_curlist); +} + +static int rcu_completion(void) +{ + int stop; + + rcu_polling_in_progress = 0; + rcu_invoke_callbacks(); + + stop = rcu_prepare_polling(); + + return stop; +} + +static int rcu_polling(void) +{ + int i; + int stop; + + for (i = 0; i < smp_num_cpus; i++) { + int cpu = cpu_logical_map(i); + + if (rcu_qsmask & (1UL << cpu)) + if (rcu_quiescent_checkpoint[cpu] != RCU_quiescent(cpu)) + rcu_qsmask &= ~(1UL << cpu); + } + + stop = 0; + if (!rcu_qsmask) + stop = rcu_completion(); + + return stop; +} + +/* + * Look into the per-cpu callback information to see if there is + * any processing necessary - if so do it. + */ +static void rcu_process_callbacks(unsigned long data) +{ + int stop; + + spin_lock(&rcu_lock); + if (!rcu_polling_in_progress) + stop = rcu_prepare_polling(); + else + stop = rcu_polling(); + spin_unlock(&rcu_lock); + + if (!stop) + tasklet_hi_schedule(&rcu_tasklet); +} + +/* Because of FASTCALL declaration of complete, we use this wrapper */ +static void wakeme_after_rcu(void *completion) +{ + complete(completion); +} + +/* + * Initializes rcu mechanism. Assumed to be called early. + * That is before local timer(SMP) or jiffie timer (uniproc) is setup. + */ +void __init rcu_init(void) +{ + tasklet_init(&rcu_tasklet, rcu_process_callbacks, 0UL); + INIT_LIST_HEAD(&rcu_nxtlist); + INIT_LIST_HEAD(&rcu_curlist); + spin_lock_init(&rcu_lock); +} + +/* + * Wait until all the CPUs have gone through a "quiescent" state. + */ +void synchronize_kernel(void) +{ + struct rcu_head rcu; + DECLARE_COMPLETION(completion); + + /* Will wake me after RCU finished */ + call_rcu(&rcu, wakeme_after_rcu, &completion); + + /* Wait for it */ + wait_for_completion(&completion); +} + +EXPORT_SYMBOL(call_rcu); +EXPORT_SYMBOL(synchronize_kernel); diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/sched.c x/kernel/sched.c --- x-ref/kernel/sched.c 2003-06-07 12:33:49.000000000 +0200 +++ x/kernel/sched.c 2003-06-07 12:33:51.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include /* * Convert user-nice values [ -20 ... 0 ... 19 ] @@ -111,45 +112,7 @@ #define TASK_TIMESLICE(p) (MIN_TIMESLICE + \ ((MAX_TIMESLICE - MIN_TIMESLICE) * (MAX_PRIO-1-(p)->static_prio)/39)) -/* - * These are the runqueue data structures: - */ - -#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long)) - -typedef struct runqueue runqueue_t; - -struct prio_array { - int nr_active; - unsigned long bitmap[BITMAP_SIZE]; - list_t queue[MAX_PRIO]; -}; - -/* - * This is the main, per-CPU runqueue data structure. - * - * Locking rule: those places that want to lock multiple runqueues - * (such as the load balancing or the process migration code), lock - * acquire operations must be ordered by ascending &runqueue. - */ -struct runqueue { - spinlock_t lock; - unsigned long nr_running, nr_switches, expired_timestamp; - task_t *curr, *idle; - prio_array_t *active, *expired, arrays[2]; - long nr_uninterruptible; - int prev_nr_running[NR_CPUS]; - task_t *migration_thread; - list_t migration_queue; -} ____cacheline_aligned; - -static struct runqueue runqueues[NR_CPUS] __cacheline_aligned; - -#define cpu_rq(cpu) (runqueues + (cpu)) -#define this_rq() cpu_rq(smp_processor_id()) -#define task_rq(p) cpu_rq((p)->cpu) -#define cpu_curr(cpu) (cpu_rq(cpu)->curr) -#define rt_task(p) ((p)->prio < MAX_RT_PRIO) +runqueue_t runqueues[NR_CPUS] __cacheline_aligned; /* * Default context-switch locking: @@ -852,6 +815,7 @@ pick_next_task: switch_tasks: prefetch(next); + rq->quiescent++; clear_tsk_need_resched(prev); if (likely(prev != next)) { @@ -871,6 +835,18 @@ switch_tasks: goto need_resched; } +void force_cpu_reschedule(int cpu) +{ + struct runqueue *rq; + unsigned long flags; + + rq = cpu_rq(cpu); + + spin_lock_irqsave(&rq->lock, flags); + resched_task(rq->curr); + spin_unlock_irqrestore(&rq->lock, flags); +} + /* * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve