diff -urN linux-2.4.10/CREDITS linux/CREDITS --- linux-2.4.10/CREDITS Sat Oct 20 03:20:24 2001 +++ linux/CREDITS Sat Oct 20 03:21:15 2001 @@ -965,8 +965,8 @@ N: Nigel Gamble E: nigel@nrg.org -E: nigel@sgi.com D: Interrupt-driven printer driver +D: Preemptible kernel S: 120 Alley Way S: Mountain View, California 94040 S: USA diff -urN linux-2.4.10/Documentation/Configure.help linux/Documentation/Configure.help --- linux-2.4.10/Documentation/Configure.help Sat Oct 20 03:20:53 2001 +++ linux/Documentation/Configure.help Sat Oct 20 03:21:15 2001 @@ -132,6 +132,19 @@ If you have system with several CPU's, you do not need to say Y here: APIC will be used automatically. +Preemptible Kernel +CONFIG_PREEMPT + This option reduces the latency of the kernel when reacting to + real-time or interactive events by allowing a low priority process to + be preempted even if it is in kernel mode executing a system call. + This allows applications to run more reliably even when the system is + under load due to other, lower priority, processes. + + Say Y here if you are building a kernel for a desktop system, embedded + system or real-time system. Say N if you are building a kernel for a + system where throughput is more important than interactive response, + such as a server system. Say N if you are unsure. + Kernel math emulation CONFIG_MATH_EMULATION Linux can emulate a math coprocessor (used for floating point diff -urN linux-2.4.10/MAINTAINERS linux/MAINTAINERS --- linux-2.4.10/MAINTAINERS Sat Oct 20 03:20:25 2001 +++ linux/MAINTAINERS Sat Oct 20 03:21:15 2001 @@ -1151,6 +1151,13 @@ M: mostrows@styx.uwaterloo.ca S: Maintained +PREEMPTIBLE KERNEL +P: Robert M. Love +M: rml@tech9.net +L: linux-kernel@vger.kernel.org +W: http://tech9.net/rml/linux +S: Maintained + PROMISE DC4030 CACHING DISK CONTROLLER DRIVER P: Peter Denison M: promise@pnd-pc.demon.co.uk diff -urN linux-2.4.10/arch/i386/config.in linux/arch/i386/config.in --- linux-2.4.10/arch/i386/config.in Sat Oct 20 03:20:43 2001 +++ linux/arch/i386/config.in Sat Oct 20 03:21:15 2001 @@ -169,6 +169,7 @@ bool 'Math emulation' CONFIG_MATH_EMULATION bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR bool 'Symmetric multi-processing support' CONFIG_SMP +bool 'Preemptible Kernel' CONFIG_PREEMPT if [ "$CONFIG_SMP" != "y" ]; then bool 'APIC and IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC if [ "$CONFIG_X86_UP_IOAPIC" = "y" ]; then @@ -177,8 +178,10 @@ fi fi -if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then - define_bool CONFIG_HAVE_DEC_LOCK y +if [ "$CONFIG_SMP" = "y" -o "$CONFIG_PREEMPT" = "y" ]; then + if [ "$CONFIG_X86_CMPXCHG" = "y" ]; then + define_bool CONFIG_HAVE_DEC_LOCK y + fi fi endmenu diff -urN linux-2.4.10/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S --- linux-2.4.10/arch/i386/kernel/entry.S Sat Oct 20 03:20:43 2001 +++ linux/arch/i386/kernel/entry.S Sat Oct 20 03:21:15 2001 @@ -72,7 +72,7 @@ * these are offsets into the task-struct. */ state = 0 -flags = 4 +preempt_count = 4 sigpending = 8 addr_limit = 12 exec_domain = 16 @@ -80,8 +80,28 @@ tsk_ptrace = 24 processor = 52 + /* These are offsets into the irq_stat structure + * There is one per cpu and it is aligned to 32 + * byte boundry (we put that here as a shift count) + */ +irq_array_shift = CONFIG_X86_L1_CACHE_SHIFT + +irq_stat_local_irq_count = 4 +irq_stat_local_bh_count = 8 + ENOSYS = 38 +#ifdef CONFIG_SMP +#define GET_CPU_INDX movl processor(%ebx),%eax; \ + shll $irq_array_shift,%eax +#define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx); \ + GET_CPU_INDX +#define CPU_INDX (,%eax) +#else +#define GET_CPU_INDX +#define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx) +#define CPU_INDX +#endif #define SAVE_ALL \ cld; \ @@ -248,12 +268,30 @@ ALIGN ENTRY(ret_from_intr) GET_CURRENT(%ebx) +#ifdef CONFIG_PREEMPT + cli + decl preempt_count(%ebx) +#endif ret_from_exception: movl EFLAGS(%esp),%eax # mix EFLAGS and CS movb CS(%esp),%al testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor? jne ret_from_sys_call +#ifdef CONFIG_PREEMPT + cmpl $0,preempt_count(%ebx) + jnz restore_all + cmpl $0,need_resched(%ebx) + jz restore_all + movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx + addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx + jnz restore_all + incl preempt_count(%ebx) + sti + call SYMBOL_NAME(preempt_schedule) + jmp ret_from_intr +#else jmp restore_all +#endif ALIGN reschedule: @@ -290,6 +328,9 @@ GET_CURRENT(%ebx) call *%edi addl $8,%esp +#ifdef CONFIG_PREEMPT + cli +#endif jmp ret_from_exception ENTRY(coprocessor_error) @@ -309,12 +350,18 @@ movl %cr0,%eax testl $0x4,%eax # EM (math emulation bit) jne device_not_available_emulate +#ifdef CONFIG_PREEMPT + cli +#endif call SYMBOL_NAME(math_state_restore) jmp ret_from_exception device_not_available_emulate: pushl $0 # temporary storage for ORIG_EIP call SYMBOL_NAME(math_emulate) addl $4,%esp +#ifdef CONFIG_PREEMPT + cli +#endif jmp ret_from_exception ENTRY(debug) diff -urN linux-2.4.10/arch/i386/kernel/i387.c linux/arch/i386/kernel/i387.c --- linux-2.4.10/arch/i386/kernel/i387.c Sat Oct 20 03:20:43 2001 +++ linux/arch/i386/kernel/i387.c Sat Oct 20 03:21:15 2001 @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -65,6 +66,8 @@ { struct task_struct *tsk = current; + preempt_disable(); + if (tsk->flags & PF_USEDFPU) { __save_init_fpu(tsk); return; diff -urN linux-2.4.10/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c --- linux-2.4.10/arch/i386/kernel/traps.c Sat Oct 20 03:20:43 2001 +++ linux/arch/i386/kernel/traps.c Sat Oct 20 03:21:15 2001 @@ -697,6 +697,11 @@ */ asmlinkage void math_state_restore(struct pt_regs regs) { + /* + * CONFIG_PREEMPT + * Must be called with preemption disabled + */ + __asm__ __volatile__("clts"); /* Allow maths ops (or we recurse) */ if (current->used_math) { diff -urN linux-2.4.10/arch/i386/lib/dec_and_lock.c linux/arch/i386/lib/dec_and_lock.c --- linux-2.4.10/arch/i386/lib/dec_and_lock.c Sat Oct 20 03:20:43 2001 +++ linux/arch/i386/lib/dec_and_lock.c Sat Oct 20 03:21:15 2001 @@ -8,6 +8,7 @@ */ #include +#include #include int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) diff -urN linux-2.4.10/drivers/ieee1394/csr.c linux/drivers/ieee1394/csr.c --- linux-2.4.10/drivers/ieee1394/csr.c Sat Oct 20 03:20:39 2001 +++ linux/drivers/ieee1394/csr.c Sat Oct 20 03:21:15 2001 @@ -10,6 +10,7 @@ */ #include +#include #include "ieee1394_types.h" #include "hosts.h" diff -urN linux-2.4.10/fs/adfs/map.c linux/fs/adfs/map.c --- linux-2.4.10/fs/adfs/map.c Sat Oct 20 03:20:24 2001 +++ linux/fs/adfs/map.c Sat Oct 20 03:21:15 2001 @@ -12,6 +12,7 @@ #include #include #include +#include #include "adfs.h" diff -urN linux-2.4.10/fs/exec.c linux/fs/exec.c --- linux-2.4.10/fs/exec.c Sat Oct 20 03:20:24 2001 +++ linux/fs/exec.c Sat Oct 20 03:21:15 2001 @@ -419,8 +419,8 @@ active_mm = current->active_mm; current->mm = mm; current->active_mm = mm; - task_unlock(current); activate_mm(active_mm, mm); + task_unlock(current); mm_release(); if (old_mm) { if (active_mm != old_mm) BUG(); diff -urN linux-2.4.10/fs/fat/cache.c linux/fs/fat/cache.c --- linux-2.4.10/fs/fat/cache.c Sat Oct 20 03:20:24 2001 +++ linux/fs/fat/cache.c Sat Oct 20 03:21:15 2001 @@ -14,6 +14,7 @@ #include #include #include +#include #include "msbuffer.h" diff -urN linux-2.4.10/include/asm-i386/hardirq.h linux/include/asm-i386/hardirq.h --- linux-2.4.10/include/asm-i386/hardirq.h Sat Oct 20 03:20:24 2001 +++ linux/include/asm-i386/hardirq.h Sat Oct 20 03:21:15 2001 @@ -36,6 +36,8 @@ #define synchronize_irq() barrier() +#define release_irqlock(cpu) do { } while (0) + #else #include diff -urN linux-2.4.10/include/asm-i386/highmem.h linux/include/asm-i386/highmem.h --- linux-2.4.10/include/asm-i386/highmem.h Sat Oct 20 03:20:24 2001 +++ linux/include/asm-i386/highmem.h Sat Oct 20 03:21:15 2001 @@ -85,6 +85,7 @@ enum fixed_addresses idx; unsigned long vaddr; + preempt_disable(); if (page < highmem_start_page) return page_address(page); @@ -106,8 +107,10 @@ unsigned long vaddr = (unsigned long) kvaddr; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - if (vaddr < FIXADDR_START) // FIXME + if (vaddr < FIXADDR_START) { // FIXME + preempt_enable(); return; + } if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) BUG(); @@ -119,6 +122,7 @@ pte_clear(kmap_pte-idx); __flush_tlb_one(vaddr); #endif + preempt_enable(); } #endif /* __KERNEL__ */ diff -urN linux-2.4.10/include/asm-i386/hw_irq.h linux/include/asm-i386/hw_irq.h --- linux-2.4.10/include/asm-i386/hw_irq.h Sat Oct 20 03:20:24 2001 +++ linux/include/asm-i386/hw_irq.h Sat Oct 20 03:21:15 2001 @@ -95,6 +95,18 @@ #define __STR(x) #x #define STR(x) __STR(x) +#define GET_CURRENT \ + "movl %esp, %ebx\n\t" \ + "andl $-8192, %ebx\n\t" + +#ifdef CONFIG_PREEMPT +#define BUMP_CONTEX_SWITCH_LOCK \ + GET_CURRENT \ + "incl 4(%ebx)\n\t" +#else +#define BUMP_CONTEX_SWITCH_LOCK +#endif + #define SAVE_ALL \ "cld\n\t" \ "pushl %es\n\t" \ @@ -108,15 +120,12 @@ "pushl %ebx\n\t" \ "movl $" STR(__KERNEL_DS) ",%edx\n\t" \ "movl %edx,%ds\n\t" \ - "movl %edx,%es\n\t" + "movl %edx,%es\n\t" \ + BUMP_CONTEX_SWITCH_LOCK #define IRQ_NAME2(nr) nr##_interrupt(void) #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) -#define GET_CURRENT \ - "movl %esp, %ebx\n\t" \ - "andl $-8192, %ebx\n\t" - /* * SMP has a few special interrupts for IPI messages */ diff -urN linux-2.4.10/include/asm-i386/i387.h linux/include/asm-i386/i387.h --- linux-2.4.10/include/asm-i386/i387.h Sat Oct 20 03:20:24 2001 +++ linux/include/asm-i386/i387.h Sat Oct 20 03:21:15 2001 @@ -12,6 +12,7 @@ #define __ASM_I386_I387_H #include +#include #include #include #include @@ -24,7 +25,7 @@ extern void restore_fpu( struct task_struct *tsk ); extern void kernel_fpu_begin(void); -#define kernel_fpu_end() stts() +#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0) #define unlazy_fpu( tsk ) do { \ diff -urN linux-2.4.10/include/asm-i386/mmu_context.h linux/include/asm-i386/mmu_context.h --- linux-2.4.10/include/asm-i386/mmu_context.h Sat Oct 20 03:20:24 2001 +++ linux/include/asm-i386/mmu_context.h Sat Oct 20 03:21:15 2001 @@ -27,6 +27,10 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) { +#ifdef CONFIG_PREEMPT + if (preempt_is_disable() == 0) + BUG(); +#endif if (prev != next) { /* stop flush ipis for the previous mm */ clear_bit(cpu, &prev->cpu_vm_mask); diff -urN linux-2.4.10/include/asm-i386/pgalloc.h linux/include/asm-i386/pgalloc.h --- linux-2.4.10/include/asm-i386/pgalloc.h Sat Oct 20 03:20:24 2001 +++ linux/include/asm-i386/pgalloc.h Sat Oct 20 03:21:15 2001 @@ -65,20 +65,26 @@ { unsigned long *ret; + preempt_disable(); if ((ret = pgd_quicklist) != NULL) { pgd_quicklist = (unsigned long *)(*ret); ret[0] = 0; pgtable_cache_size--; - } else + preempt_enable(); + } else { + preempt_enable(); ret = (unsigned long *)get_pgd_slow(); + } return (pgd_t *)ret; } static __inline__ void free_pgd_fast(pgd_t *pgd) { + preempt_disable(); *(unsigned long *)pgd = (unsigned long) pgd_quicklist; pgd_quicklist = (unsigned long *) pgd; pgtable_cache_size++; + preempt_enable(); } static __inline__ void free_pgd_slow(pgd_t *pgd) @@ -108,19 +114,23 @@ { unsigned long *ret; + preempt_disable(); if ((ret = (unsigned long *)pte_quicklist) != NULL) { pte_quicklist = (unsigned long *)(*ret); ret[0] = ret[1]; pgtable_cache_size--; } + preempt_enable(); return (pte_t *)ret; } static __inline__ void pte_free_fast(pte_t *pte) { + preempt_disable(); *(unsigned long *)pte = (unsigned long) pte_quicklist; pte_quicklist = (unsigned long *) pte; pgtable_cache_size++; + preempt_enable(); } static __inline__ void pte_free_slow(pte_t *pte) diff -urN linux-2.4.10/include/asm-i386/processor.h linux/include/asm-i386/processor.h --- linux-2.4.10/include/asm-i386/processor.h Sat Oct 20 03:20:24 2001 +++ linux/include/asm-i386/processor.h Sat Oct 20 03:21:15 2001 @@ -500,7 +500,10 @@ { __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); } -#define spin_lock_prefetch(x) prefetchw(x) +#define spin_lock_prefetch(x) do { \ + prefetchw(x); \ + preempt_prefetch(¤t->preempt_count.counter); \ +} while(0) #endif diff -urN linux-2.4.10/include/asm-i386/smplock.h linux/include/asm-i386/smplock.h --- linux-2.4.10/include/asm-i386/smplock.h Sat Oct 20 03:20:24 2001 +++ linux/include/asm-i386/smplock.h Sat Oct 20 03:21:15 2001 @@ -10,7 +10,15 @@ extern spinlock_t kernel_flag; +#ifdef CONFIG_SMP #define kernel_locked() spin_is_locked(&kernel_flag) +#else +#ifdef CONFIG_PREEMPT +#define kernel_locked() preempt_is_disable() +#else +#define kernel_locked() 1 +#endif +#endif /* * Release global kernel lock and global interrupt lock @@ -42,6 +50,11 @@ */ static __inline__ void lock_kernel(void) { +#ifdef CONFIG_PREEMPT + if (current->lock_depth == -1) + spin_lock(&kernel_flag); + ++current->lock_depth; +#else #if 1 if (!++current->lock_depth) spin_lock(&kernel_flag); @@ -54,6 +67,7 @@ :"=m" (__dummy_lock(&kernel_flag)), "=m" (current->lock_depth)); #endif +#endif } static __inline__ void unlock_kernel(void) diff -urN linux-2.4.10/include/asm-i386/softirq.h linux/include/asm-i386/softirq.h --- linux-2.4.10/include/asm-i386/softirq.h Sat Oct 20 03:20:24 2001 +++ linux/include/asm-i386/softirq.h Sat Oct 20 03:21:15 2001 @@ -5,9 +5,9 @@ #include #define __cpu_bh_enable(cpu) \ - do { barrier(); local_bh_count(cpu)--; } while (0) + do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0) #define cpu_bh_disable(cpu) \ - do { local_bh_count(cpu)++; barrier(); } while (0) + do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0) #define local_bh_disable() cpu_bh_disable(smp_processor_id()) #define __local_bh_enable() __cpu_bh_enable(smp_processor_id()) @@ -22,7 +22,7 @@ * If you change the offsets in irq_stat then you have to * update this code as well. */ -#define local_bh_enable() \ +#define _local_bh_enable() \ do { \ unsigned int *ptr = &local_bh_count(smp_processor_id()); \ \ @@ -45,4 +45,6 @@ /* no registers clobbered */ ); \ } while (0) +#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0) + #endif /* __ASM_SOFTIRQ_H */ diff -urN linux-2.4.10/include/asm-i386/spinlock.h linux/include/asm-i386/spinlock.h --- linux-2.4.10/include/asm-i386/spinlock.h Sat Oct 20 03:20:24 2001 +++ linux/include/asm-i386/spinlock.h Sat Oct 20 03:21:15 2001 @@ -70,7 +70,7 @@ #define spin_unlock_string \ "movb $1,%0" -static inline int spin_trylock(spinlock_t *lock) +static inline int _raw_spin_trylock(spinlock_t *lock) { char oldval; __asm__ __volatile__( @@ -80,7 +80,7 @@ return oldval > 0; } -static inline void spin_lock(spinlock_t *lock) +static inline void _raw_spin_lock(spinlock_t *lock) { #if SPINLOCK_DEBUG __label__ here; @@ -95,7 +95,7 @@ :"=m" (lock->lock) : : "memory"); } -static inline void spin_unlock(spinlock_t *lock) +static inline void _raw_spin_unlock(spinlock_t *lock) { #if SPINLOCK_DEBUG if (lock->magic != SPINLOCK_MAGIC) @@ -148,7 +148,7 @@ */ /* the spinlock helpers are in arch/i386/kernel/semaphore.c */ -static inline void read_lock(rwlock_t *rw) +static inline void _raw_read_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -157,7 +157,7 @@ __build_read_lock(rw, "__read_lock_failed"); } -static inline void write_lock(rwlock_t *rw) +static inline void _raw_write_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -166,10 +166,10 @@ __build_write_lock(rw, "__write_lock_failed"); } -#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") -#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") +#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") +#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") -static inline int write_trylock(rwlock_t *lock) +static inline int _raw_write_trylock(rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; if (atomic_sub_and_test(RW_LOCK_BIAS, count)) diff -urN linux-2.4.10/include/linux/brlock.h linux/include/linux/brlock.h --- linux-2.4.10/include/linux/brlock.h Sat Oct 20 03:20:24 2001 +++ linux/include/linux/brlock.h Sat Oct 20 03:21:15 2001 @@ -171,11 +171,11 @@ } #else -# define br_read_lock(idx) ((void)(idx)) -# define br_read_unlock(idx) ((void)(idx)) -# define br_write_lock(idx) ((void)(idx)) -# define br_write_unlock(idx) ((void)(idx)) -#endif +# define br_read_lock(idx) ({ (void)(idx); preempt_disable(); }) +# define br_read_unlock(idx) ({ (void)(idx); preempt_enable(); }) +# define br_write_lock(idx) ({ (void)(idx); preempt_disable(); }) +# define br_write_unlock(idx) ({ (void)(idx); preempt_enable(); }) +#endif /* CONFIG_SMP */ /* * Now enumerate all of the possible sw/hw IRQ protected diff -urN linux-2.4.10/include/linux/dcache.h linux/include/linux/dcache.h --- linux-2.4.10/include/linux/dcache.h Sat Oct 20 03:20:24 2001 +++ linux/include/linux/dcache.h Sat Oct 20 03:21:15 2001 @@ -126,31 +126,6 @@ extern spinlock_t dcache_lock; -/** - * d_drop - drop a dentry - * @dentry: dentry to drop - * - * d_drop() unhashes the entry from the parent - * dentry hashes, so that it won't be found through - * a VFS lookup any more. Note that this is different - * from deleting the dentry - d_delete will try to - * mark the dentry negative if possible, giving a - * successful _negative_ lookup, while d_drop will - * just make the cache lookup fail. - * - * d_drop() is used mainly for stuff that wants - * to invalidate a dentry for some reason (NFS - * timeouts or autofs deletes). - */ - -static __inline__ void d_drop(struct dentry * dentry) -{ - spin_lock(&dcache_lock); - list_del(&dentry->d_hash); - INIT_LIST_HEAD(&dentry->d_hash); - spin_unlock(&dcache_lock); -} - static __inline__ int dname_external(struct dentry *d) { return d->d_name.name != d->d_iname; @@ -272,3 +247,34 @@ #endif /* __KERNEL__ */ #endif /* __LINUX_DCACHE_H */ + +#if !defined(__LINUX_DCACHE_H_INLINES) && defined(_TASK_STRUCT_DEFINED) +#define __LINUX_DCACHE_H_INLINES + +#ifdef __KERNEL__ +/** + * d_drop - drop a dentry + * @dentry: dentry to drop + * + * d_drop() unhashes the entry from the parent + * dentry hashes, so that it won't be found through + * a VFS lookup any more. Note that this is different + * from deleting the dentry - d_delete will try to + * mark the dentry negative if possible, giving a + * successful _negative_ lookup, while d_drop will + * just make the cache lookup fail. + * + * d_drop() is used mainly for stuff that wants + * to invalidate a dentry for some reason (NFS + * timeouts or autofs deletes). + */ + +static __inline__ void d_drop(struct dentry * dentry) +{ + spin_lock(&dcache_lock); + list_del(&dentry->d_hash); + INIT_LIST_HEAD(&dentry->d_hash); + spin_unlock(&dcache_lock); +} +#endif +#endif diff -urN linux-2.4.10/include/linux/fs_struct.h linux/include/linux/fs_struct.h --- linux-2.4.10/include/linux/fs_struct.h Sat Oct 20 03:20:24 2001 +++ linux/include/linux/fs_struct.h Sat Oct 20 03:21:15 2001 @@ -20,6 +20,15 @@ extern void exit_fs(struct task_struct *); extern void set_fs_altroot(void); +struct fs_struct *copy_fs_struct(struct fs_struct *old); +void put_fs_struct(struct fs_struct *fs); + +#endif +#endif + +#if !defined(_LINUX_FS_STRUCT_H_INLINES) && defined(_TASK_STRUCT_DEFINED) +#define _LINUX_FS_STRUCT_H_INLINES +#ifdef __KERNEL__ /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. * It can block. Requires the big lock held. @@ -65,9 +74,5 @@ mntput(old_pwdmnt); } } - -struct fs_struct *copy_fs_struct(struct fs_struct *old); -void put_fs_struct(struct fs_struct *fs); - #endif #endif diff -urN linux-2.4.10/include/linux/sched.h linux/include/linux/sched.h --- linux-2.4.10/include/linux/sched.h Sat Oct 20 03:20:24 2001 +++ linux/include/linux/sched.h Sat Oct 20 03:21:15 2001 @@ -88,6 +88,7 @@ #define TASK_UNINTERRUPTIBLE 2 #define TASK_ZOMBIE 4 #define TASK_STOPPED 8 +#define PREEMPT_ACTIVE 0x40000000 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -154,6 +155,9 @@ #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long FASTCALL(schedule_timeout(signed long timeout)); asmlinkage void schedule(void); +#ifdef CONFIG_PREEMPT +asmlinkage void preempt_schedule(void); +#endif extern int schedule_task(struct tq_struct *task); extern void flush_scheduled_tasks(void); @@ -283,7 +287,17 @@ * offsets of these are hardcoded elsewhere - touch with care */ volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ +#ifdef CONFIG_PREEMPT + /* + * We want the preempt_count in this cache line, but we + * a) don't want to mess up the offsets in asm code, and + * b) the alignment of the next line below, + * so we move "flags" down + */ + atomic_t preempt_count; /* 0=> preemptable, < 0 => BUG */ +#else unsigned long flags; /* per process flags, defined below */ +#endif int sigpending; mm_segment_t addr_limit; /* thread address space: 0-0xBFFFFFFF for user-thead @@ -317,6 +331,10 @@ struct mm_struct *active_mm; struct list_head local_pages; unsigned int allocation_order, nr_local_pages; +#ifdef CONFIG_PREEMPT + unsigned long flags; /* per process flags, defined below */ +#endif + /* task state */ struct linux_binfmt *binfmt; @@ -899,6 +917,11 @@ return res; } +#define _TASK_STRUCT_DEFINED +#include +#include +#include + #endif /* __KERNEL__ */ #endif diff -urN linux-2.4.10/include/linux/smp.h linux/include/linux/smp.h --- linux-2.4.10/include/linux/smp.h Sat Oct 20 03:20:24 2001 +++ linux/include/linux/smp.h Sat Oct 20 03:21:15 2001 @@ -81,7 +81,9 @@ #define smp_processor_id() 0 #define hard_smp_processor_id() 0 #define smp_threads_ready 1 +#ifndef CONFIG_PREEMPT #define kernel_lock() +#endif #define cpu_logical_map(cpu) 0 #define cpu_number_map(cpu) 0 #define smp_call_function(func,info,retry,wait) ({ 0; }) diff -urN linux-2.4.10/include/linux/smp_lock.h linux/include/linux/smp_lock.h --- linux-2.4.10/include/linux/smp_lock.h Sat Oct 20 03:20:24 2001 +++ linux/include/linux/smp_lock.h Sat Oct 20 03:21:15 2001 @@ -3,7 +3,7 @@ #include -#ifndef CONFIG_SMP +#if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT) #define lock_kernel() do { } while(0) #define unlock_kernel() do { } while(0) diff -urN linux-2.4.10/include/linux/spinlock.h linux/include/linux/spinlock.h --- linux-2.4.10/include/linux/spinlock.h Sat Oct 20 03:20:24 2001 +++ linux/include/linux/spinlock.h Sat Oct 20 03:21:15 2001 @@ -41,7 +41,9 @@ #if (DEBUG_SPINLOCKS < 1) +#ifndef CONFIG_PREEMPT #define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic) +#endif /* * Your basic spinlocks, allowing only a single CPU anywhere @@ -57,11 +59,11 @@ #endif #define spin_lock_init(lock) do { } while(0) -#define spin_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_spin_lock(lock) (void)(lock) /* Not "unused variable". */ #define spin_is_locked(lock) (0) -#define spin_trylock(lock) ({1; }) +#define _raw_spin_trylock(lock) ({1; }) #define spin_unlock_wait(lock) do { } while(0) -#define spin_unlock(lock) do { } while(0) +#define _raw_spin_unlock(lock) do { } while(0) #elif (DEBUG_SPINLOCKS < 2) @@ -120,12 +122,72 @@ #endif #define rwlock_init(lock) do { } while(0) -#define read_lock(lock) (void)(lock) /* Not "unused variable". */ -#define read_unlock(lock) do { } while(0) -#define write_lock(lock) (void)(lock) /* Not "unused variable". */ -#define write_unlock(lock) do { } while(0) +#define _raw_read_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_read_unlock(lock) do { } while(0) +#define _raw_write_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_write_unlock(lock) do { } while(0) #endif /* !SMP */ + +#ifdef CONFIG_PREEMPT + +#define switch_lock_count() current->preempt_count +#define preempt_is_disable() (switch_lock_count().counter) +#define atomic_ptr_preempt_count() (&switch_lock_count()) +#define preempt_prefetch(a) prefetchw(a) + +#define preempt_disable() do { \ + atomic_inc(atomic_ptr_preempt_count()); \ +} while (0) + +#define preempt_enable_no_resched() do { \ + atomic_dec(atomic_ptr_preempt_count()); \ +} while (0) + +#define preempt_enable() do { \ + if (atomic_dec_and_test(atomic_ptr_preempt_count()) && \ + current->need_resched) \ + preempt_schedule(); \ +} while (0) + +#define spin_lock(lock) do { \ + preempt_disable(); \ + _raw_spin_lock(lock); \ +} while(0) + +#define spin_trylock(lock) ({preempt_disable(); _raw_spin_trylock(lock) ? \ + 1 : ({preempt_enable(); 0;});}) + +#define spin_unlock(lock) do { \ + _raw_spin_unlock(lock); \ + preempt_enable(); \ +} while (0) + +#define read_lock(lock) ({preempt_disable(); _raw_read_lock(lock);}) +#define read_unlock(lock) ({_raw_read_unlock(lock); preempt_enable();}) +#define write_lock(lock) ({preempt_disable(); _raw_write_lock(lock);}) +#define write_unlock(lock) ({_raw_write_unlock(lock); preempt_enable();}) +#define write_trylock(lock) ({preempt_disable(); _raw_write_trylock(lock) ? \ + 1 : ({preempt_enable(); 0;});}) + +#else /* CONFIG_PREEMPT */ + +#define preempt_is_disable() do { } while (0) +#define preempt_disable() do { } while (0) +#define preempt_enable_no_resched() +#define preempt_enable() do { } while (0) +#define preempt_prefetch(a) + +#define spin_lock(lock) _raw_spin_lock(lock) +#define spin_trylock(lock) _raw_spin_trylock(lock) +#define spin_unlock(lock) _raw_spin_unlock(lock) + +#define read_lock(lock) _raw_read_lock(lock) +#define read_unlock(lock) _raw_read_unlock(lock) +#define write_lock(lock) _raw_write_lock(lock) +#define write_unlock(lock) _raw_write_unlock(lock) +#define write_trylock(lock) _raw_write_trylock(lock) +#endif /* !CONFIG_PREEMPT */ /* "lock on reference count zero" */ #ifndef atomic_dec_and_lock diff -urN linux-2.4.10/include/linux/tqueue.h linux/include/linux/tqueue.h --- linux-2.4.10/include/linux/tqueue.h Sat Oct 20 03:20:24 2001 +++ linux/include/linux/tqueue.h Sat Oct 20 03:21:15 2001 @@ -94,6 +94,22 @@ extern spinlock_t tqueue_lock; /* + * Call all "bottom halfs" on a given list. + */ + +extern void __run_task_queue(task_queue *list); + +static inline void run_task_queue(task_queue *list) +{ + if (TQ_ACTIVE(*list)) + __run_task_queue(list); +} + +#endif /* _LINUX_TQUEUE_H */ + +#if !defined(_LINUX_TQUEUE_H_INLINES) && defined(_TASK_STRUCT_DEFINED) +#define _LINUX_TQUEUE_H_INLINES +/* * Queue a task on a tq. Return non-zero if it was successfully * added. */ @@ -109,17 +125,4 @@ } return ret; } - -/* - * Call all "bottom halfs" on a given list. - */ - -extern void __run_task_queue(task_queue *list); - -static inline void run_task_queue(task_queue *list) -{ - if (TQ_ACTIVE(*list)) - __run_task_queue(list); -} - -#endif /* _LINUX_TQUEUE_H */ +#endif diff -urN linux-2.4.10/kernel/exit.c linux/kernel/exit.c --- linux-2.4.10/kernel/exit.c Sat Oct 20 03:20:24 2001 +++ linux/kernel/exit.c Sat Oct 20 03:21:15 2001 @@ -279,6 +279,10 @@ struct mm_struct * start_lazy_tlb(void) { struct mm_struct *mm = current->mm; +#ifdef CONFIG_PREEMPT + if (preempt_is_disable() == 0) + BUG(); +#endif current->mm = NULL; /* active_mm is still 'mm' */ atomic_inc(&mm->mm_count); @@ -290,6 +294,10 @@ { struct mm_struct *active_mm = current->active_mm; +#ifdef CONFIG_PREEMPT + if (preempt_is_disable() == 0) + BUG(); +#endif current->mm = mm; if (mm != active_mm) { current->active_mm = mm; @@ -313,8 +321,8 @@ /* more a memory barrier than a real lock */ task_lock(tsk); tsk->mm = NULL; - task_unlock(tsk); enter_lazy_tlb(mm, current, smp_processor_id()); + task_unlock(tsk); mmput(mm); } } diff -urN linux-2.4.10/kernel/fork.c linux/kernel/fork.c --- linux-2.4.10/kernel/fork.c Sat Oct 20 03:20:24 2001 +++ linux/kernel/fork.c Sat Oct 20 03:21:15 2001 @@ -604,6 +604,12 @@ if (p->binfmt && p->binfmt->module) __MOD_INC_USE_COUNT(p->binfmt->module); +#ifdef CONFIG_PREEMPT + /* Since we are keeping the context switch off state as part + * of the context, make sure we start with it off. + */ + p->preempt_count.counter = 1; +#endif p->did_exec = 0; p->swappable = 0; p->state = TASK_UNINTERRUPTIBLE; diff -urN linux-2.4.10/kernel/ksyms.c linux/kernel/ksyms.c --- linux-2.4.10/kernel/ksyms.c Sat Oct 20 03:20:24 2001 +++ linux/kernel/ksyms.c Sat Oct 20 03:21:15 2001 @@ -431,6 +431,9 @@ EXPORT_SYMBOL(interruptible_sleep_on); EXPORT_SYMBOL(interruptible_sleep_on_timeout); EXPORT_SYMBOL(schedule); +#ifdef CONFIG_PREEMPT +EXPORT_SYMBOL(preempt_schedule); +#endif EXPORT_SYMBOL(schedule_timeout); EXPORT_SYMBOL(jiffies); EXPORT_SYMBOL(xtime); diff -urN linux-2.4.10/kernel/sched.c linux/kernel/sched.c --- linux-2.4.10/kernel/sched.c Sat Oct 20 03:20:24 2001 +++ linux/kernel/sched.c Sat Oct 20 03:21:15 2001 @@ -475,7 +475,7 @@ task_lock(prev); prev->has_cpu = 0; mb(); - if (prev->state == TASK_RUNNING) + if (task_on_runqueue(prev)) goto needs_resched; out_unlock: @@ -505,7 +505,7 @@ goto out_unlock; spin_lock_irqsave(&runqueue_lock, flags); - if ((prev->state == TASK_RUNNING) && !prev->has_cpu) + if (task_on_runqueue(prev) && !prev->has_cpu) reschedule_idle(prev); spin_unlock_irqrestore(&runqueue_lock, flags); goto out_unlock; @@ -518,6 +518,9 @@ void schedule_tail(struct task_struct *prev) { __schedule_tail(prev); +#ifdef CONFIG_PREEMPT + preempt_enable(); +#endif } /* @@ -540,6 +543,10 @@ spin_lock_prefetch(&runqueue_lock); +#ifdef CONFIG_PREEMPT + preempt_disable(); +#endif + if (!current->active_mm) BUG(); need_resched_back: prev = current; @@ -563,6 +570,9 @@ goto move_rr_last; move_rr_back: +#ifdef CONFIG_PREEMPT + if (preempt_is_disable() & PREEMPT_ACTIVE) goto treat_like_run; +#endif switch (prev->state) { case TASK_INTERRUPTIBLE: if (signal_pending(prev)) { @@ -573,6 +583,9 @@ del_from_runqueue(prev); case TASK_RUNNING:; } +#ifdef CONFIG_PREEMPT + treat_like_run: +#endif prev->need_resched = 0; /* @@ -585,7 +598,7 @@ */ next = idle_task(this_cpu); c = -1000; - if (prev->state == TASK_RUNNING) + if (task_on_runqueue(prev)) goto still_running; still_running_back: @@ -679,6 +692,9 @@ if (current->need_resched) goto need_resched_back; +#ifdef CONFIG_PREEMPT + preempt_enable_no_resched(); +#endif return; recalculate: @@ -1076,6 +1092,32 @@ return 0; } +#ifdef CONFIG_PREEMPT + +#ifdef CONFIG_SMP +#define lock_to_this_cpu() \ + int old_cpus_allowed = current->cpus_allowed; \ + current->cpus_allowed = 1 << smp_processor_id() +#define restore_cpus_allowed() current->cpus_allowed = old_cpus_allowed; +#else +#define lock_to_this_cpu() +#define restore_cpus_allowed() +#endif /* !CONFIG_SMP */ + +asmlinkage void preempt_schedule(void) +{ + while (current->need_resched) { + /* it would be ideal not to lock tasks to their cpu here, + * but only around the data that needs such locking */ + lock_to_this_cpu(); + atomic_add(PREEMPT_ACTIVE + 1, &switch_lock_count()); + schedule(); + restore_cpus_allowed(); + atomic_sub(PREEMPT_ACTIVE + 1, &switch_lock_count()); + } +} +#endif /* CONFIG_PREEMPT */ + asmlinkage long sys_sched_get_priority_max(int policy) { int ret = -EINVAL; diff -urN linux-2.4.10/lib/dec_and_lock.c linux/lib/dec_and_lock.c --- linux-2.4.10/lib/dec_and_lock.c Sat Oct 20 03:20:24 2001 +++ linux/lib/dec_and_lock.c Sat Oct 20 03:21:15 2001 @@ -1,5 +1,6 @@ #include #include +#include #include /* diff -urN linux-2.4.10/net/socket.c linux/net/socket.c --- linux-2.4.10/net/socket.c Sat Oct 20 03:20:25 2001 +++ linux/net/socket.c Sat Oct 20 03:21:15 2001 @@ -135,7 +135,7 @@ static struct net_proto_family *net_families[NPROTO]; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) static atomic_t net_family_lockct = ATOMIC_INIT(0); static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;