diff -urN linux-2.4.12/CREDITS linux/CREDITS --- linux-2.4.12/CREDITS Sat Oct 20 02:58:01 2001 +++ linux/CREDITS Mon Nov 12 17:10:38 2001 @@ -971,8 +971,8 @@ N: Nigel Gamble E: nigel@nrg.org -E: nigel@sgi.com D: Interrupt-driven printer driver +D: Preemptible kernel S: 120 Alley Way S: Mountain View, California 94040 S: USA diff -urN linux-2.4.12/Documentation/Configure.help linux/Documentation/Configure.help --- linux-2.4.12/Documentation/Configure.help Sat Oct 20 02:58:01 2001 +++ linux/Documentation/Configure.help Mon Nov 12 17:10:38 2001 @@ -153,6 +153,19 @@ If you have a system with several CPUs, you do not need to say Y here: the local APIC will be used automatically. +Preemptible Kernel +CONFIG_PREEMPT + This option reduces the latency of the kernel when reacting to + real-time or interactive events by allowing a low priority process to + be preempted even if it is in kernel mode executing a system call. + This allows applications to run more reliably even when the system is + under load due to other, lower priority, processes. + + Say Y here if you are building a kernel for a desktop system, embedded + system or real-time system. Say N if you are building a kernel for a + system where throughput is more important than interactive response, + such as a server system. Say N if you are unsure. + Kernel math emulation CONFIG_MATH_EMULATION Linux can emulate a math coprocessor (used for floating point diff -urN linux-2.4.12/Documentation/preempt-locking.txt linux/Documentation/preempt-locking.txt --- linux-2.4.12/Documentation/preempt-locking.txt Wed Dec 31 19:00:00 1969 +++ linux/Documentation/preempt-locking.txt Mon Nov 12 17:10:38 2001 @@ -0,0 +1,94 @@ + Proper Locking Under a Preemptible Kernel: + Keeping Kernel Code Preempt-Safe + Robert Love + Last Updated: 21 Oct 2001 + + +INTRODUCTION + + +A preemptible kernel creates new locking issues. The issues are the same as +those under SMP: concurrency and reentrancy. Thankfully, the Linux preemptible +kernel model leverages existing SMP locking mechanisms. Thus, the kernel +requires explicit additional locking for very few additional situations. + +This document is for all kernel hackers. Developing code in the kernel +requires protecting these situations. As you will see, these situations would +normally require a lock, where they not per-CPU. + + +RULE #1: Per-CPU data structures need explicit protection + + +Two similar problems arise. An example code snippet: + + struct this_needs_locking tux[NR_CPUS]; + tux[smp_processor_id()] = some_value; + /* task is preempted here... */ + something = tux[smp_processor_id()]; + +First, since the data is per-CPU, it may not have explicit SMP locking, but +require it otherwise. Second, when a preempted task is finally rescheduled, +the previous value of smp_processor_id may not equal the current. You must +protect these situations by disabling preemption around them. + + +RULE #2: CPU state must be protected. + + +Under preemption, the state of the CPU must be protected. This is arch- +dependent, but includes CPU structures and state not preserved over a context +switch. For example, on x86, entering and exiting FPU mode is now a critical +section that must occur while preemption is disabled. Think what would happen +if the kernel is executing a floating-point instruction and is then preempted. +Remember, the kernel does not save FPU state except for user tasks. Therefore, +upon preemption, the FPU registers will be sold to the lowest bidder. Thus, +preemption must be disabled around such regions.i + +Note, some FPU functions are already explicitly preempt safe. For example, +kernel_fpu_begin and kernel_fpu_end will disable and enable preemption. +However, math_state_restore must be called with preemption disabled. + + +SOLUTION + + +Data protection under preemption is achieved by disabling preemption for the +duration of the critical region. + +preempt_enable() decrement the preempt counter +preempt_disable() increment the preempt counter +preempt_enable_no_resched() decrement, but do not immediately preempt + +The functions are nestable. In other words, you can call preempt_disable +n-times in a code path, and preemption will not be reenabled until the n-th +call to preempt_enable. The preempt statements define to nothing if +preemption is not enabled. + +Note that you do not need to explicitly prevent preemption if you are holding +any locks or interrupts are disabled, since preemption is implicitly disabled +in those cases. + +Example: + + cpucache_t *cc; /* this is per-CPU */ + preempt_disable(); + cc = cc_data(searchp); + if (cc && cc->avail) { + __free_block(searchp, cc_entry(cc), cc->avail); + cc->avail = 0; + } + preempt_enable(); + return 0; + +Notice how the preemption statements must encompass every reference of the +critical variables. Another example: + + int buf[NR_CPUS]; + set_cpu_val(buf); + if (buf[smp_processor_id()] == -1) printf(KERN_INFO "wee!\n"); + spin_lock(&buf_lock); + /* ... */ + +This code is not preempt-safe, but see how easily we can fix it by simply +moving the spin_lock up two lines. diff -urN linux-2.4.12/MAINTAINERS linux/MAINTAINERS --- linux-2.4.12/MAINTAINERS Sat Oct 20 02:58:01 2001 +++ linux/MAINTAINERS Mon Nov 12 17:10:38 2001 @@ -1165,6 +1165,14 @@ M: mostrows@styx.uwaterloo.ca S: Maintained +PREEMPTIBLE KERNEL +P: Robert M. Love +M: rml@tech9.net +L: linux-kernel@vger.kernel.org +L: kpreempt-tech@lists.sourceforge.net +W: http://tech9.net/rml/linux +S: Maintained + PROMISE DC4030 CACHING DISK CONTROLLER DRIVER P: Peter Denison M: promise@pnd-pc.demon.co.uk diff -urN linux-2.4.12/arch/arm/config.in linux/arch/arm/config.in --- linux-2.4.12/arch/arm/config.in Thu Oct 11 19:54:31 2001 +++ linux/arch/arm/config.in Mon Nov 12 17:10:38 2001 @@ -367,6 +367,7 @@ if [ "$CONFIG_CPU_32" = "y" -a "$CONFIG_ARCH_EBSA110" != "y" ]; then bool 'Kernel-mode alignment trap handler' CONFIG_ALIGNMENT_TRAP fi +dep_bool 'Preemptible Kernel (experimental)' CONFIG_PREEMPT $CONFIG_CPU_32 $CONFIG_EXPERIMENTAL endmenu source drivers/parport/Config.in diff -urN linux-2.4.12/arch/arm/kernel/entry-armv.S linux/arch/arm/kernel/entry-armv.S --- linux-2.4.12/arch/arm/kernel/entry-armv.S Thu Oct 11 19:54:31 2001 +++ linux/arch/arm/kernel/entry-armv.S Mon Nov 12 17:10:38 2001 @@ -649,6 +649,12 @@ add r4, sp, #S_SP mov r6, lr stmia r4, {r5, r6, r7, r8, r9} @ save sp_SVC, lr_SVC, pc, cpsr, old_ro +#ifdef CONFIG_PREEMPT + get_current_task r9 + ldr r8, [r9, #TSK_PREEMPT] + add r8, r8, #1 + str r8, [r9, #TSK_PREEMPT] +#endif 1: get_irqnr_and_base r0, r6, r5, lr movne r1, sp @ @@ -656,6 +662,25 @@ @ adrsvc ne, lr, 1b bne do_IRQ +#ifdef CONFIG_PREEMPT +2: ldr r8, [r9, #TSK_PREEMPT] + subs r8, r8, #1 + bne 3f + ldr r7, [r9, #TSK_NEED_RESCHED] + teq r7, #0 + beq 3f + ldr r6, .LCirqstat + ldr r0, [r6, #IRQSTAT_BH_COUNT] + teq r0, #0 + bne 3f + mov r0, #MODE_SVC + msr cpsr_c, r0 @ enable interrupts + bl SYMBOL_NAME(preempt_schedule) + mov r0, #I_BIT | MODE_SVC + msr cpsr_c, r0 @ disable interrupts + b 2b +3: str r8, [r9, #TSK_PREEMPT] +#endif ldr r0, [sp, #S_PSR] @ irqs are already disabled msr spsr, r0 ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr @@ -713,6 +738,9 @@ .LCprocfns: .word SYMBOL_NAME(processor) #endif .LCfp: .word SYMBOL_NAME(fp_enter) +#ifdef CONFIG_PREEMPT +.LCirqstat: .word SYMBOL_NAME(irq_stat) +#endif irq_prio_table @@ -752,6 +780,12 @@ stmdb r8, {sp, lr}^ alignment_trap r4, r7, __temp_irq zero_fp + get_current_task tsk +#ifdef CONFIG_PREEMPT + ldr r0, [tsk, #TSK_PREEMPT] + add r0, r0, #1 + str r0, [tsk, #TSK_PREEMPT] +#endif 1: get_irqnr_and_base r0, r6, r5, lr movne r1, sp adrsvc ne, lr, 1b @@ -759,8 +793,12 @@ @ routine called with r0 = irq number, r1 = struct pt_regs * @ bne do_IRQ +#ifdef CONFIG_PREEMPT + ldr r0, [tsk, #TSK_PREEMPT] + sub r0, r0, #1 + str r0, [tsk, #TSK_PREEMPT] +#endif mov why, #0 - get_current_task tsk b ret_to_user .align 5 diff -urN linux-2.4.12/arch/arm/tools/getconstants.c linux/arch/arm/tools/getconstants.c --- linux-2.4.12/arch/arm/tools/getconstants.c Thu Oct 11 19:54:32 2001 +++ linux/arch/arm/tools/getconstants.c Mon Nov 12 17:10:38 2001 @@ -13,6 +13,7 @@ #include #include +#include #ifndef __APCS_32__ #error APCS-32 required @@ -33,6 +34,11 @@ DEFN("TSS_SAVE", OFF_TSK(thread.save)); DEFN("TSS_FPESAVE", OFF_TSK(thread.fpstate.soft.save)); +#ifdef CONFIG_PREEMPT +DEFN("TSK_PREEMPT", OFF_TSK(preempt_count)); +DEFN("IRQSTAT_BH_COUNT", (unsigned long)&(((irq_cpustat_t *)0)->__local_bh_count)); +#endif + #ifdef CONFIG_CPU_32 DEFN("TSS_DOMAIN", OFF_TSK(thread.domain)); diff -urN linux-2.4.12/arch/i386/config.in linux/arch/i386/config.in --- linux-2.4.12/arch/i386/config.in Sat Oct 20 02:58:01 2001 +++ linux/arch/i386/config.in Mon Nov 12 17:10:38 2001 @@ -169,6 +169,7 @@ bool 'Math emulation' CONFIG_MATH_EMULATION bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR bool 'Symmetric multi-processing support' CONFIG_SMP +bool 'Preemptible Kernel' CONFIG_PREEMPT if [ "$CONFIG_SMP" != "y" ]; then bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC @@ -182,9 +183,12 @@ bool 'Multiquad NUMA system' CONFIG_MULTIQUAD fi -if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then - define_bool CONFIG_HAVE_DEC_LOCK y +if [ "$CONFIG_SMP" = "y" -o "$CONFIG_PREEMPT" = "y" ]; then + if [ "$CONFIG_X86_CMPXCHG" = "y" ]; then + define_bool CONFIG_HAVE_DEC_LOCK y + fi fi + endmenu mainmenu_option next_comment diff -urN linux-2.4.12/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S --- linux-2.4.12/arch/i386/kernel/entry.S Sat Oct 20 02:58:01 2001 +++ linux/arch/i386/kernel/entry.S Mon Nov 12 17:10:38 2001 @@ -71,7 +71,7 @@ * these are offsets into the task-struct. */ state = 0 -flags = 4 +preempt_count = 4 sigpending = 8 addr_limit = 12 exec_domain = 16 @@ -79,8 +79,28 @@ tsk_ptrace = 24 processor = 52 + /* These are offsets into the irq_stat structure + * There is one per cpu and it is aligned to 32 + * byte boundry (we put that here as a shift count) + */ +irq_array_shift = CONFIG_X86_L1_CACHE_SHIFT + +irq_stat_local_irq_count = 4 +irq_stat_local_bh_count = 8 + ENOSYS = 38 +#ifdef CONFIG_SMP +#define GET_CPU_INDX movl processor(%ebx),%eax; \ + shll $irq_array_shift,%eax +#define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx); \ + GET_CPU_INDX +#define CPU_INDX (,%eax) +#else +#define GET_CPU_INDX +#define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx) +#define CPU_INDX +#endif #define SAVE_ALL \ cld; \ @@ -247,12 +267,30 @@ ALIGN ENTRY(ret_from_intr) GET_CURRENT(%ebx) +#ifdef CONFIG_PREEMPT + cli + decl preempt_count(%ebx) +#endif ret_from_exception: movl EFLAGS(%esp),%eax # mix EFLAGS and CS movb CS(%esp),%al testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor? jne ret_from_sys_call +#ifdef CONFIG_PREEMPT + cmpl $0,preempt_count(%ebx) + jnz restore_all + cmpl $0,need_resched(%ebx) + jz restore_all + movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx + addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx + jnz restore_all + incl preempt_count(%ebx) + sti + call SYMBOL_NAME(preempt_schedule) + jmp ret_from_intr +#else jmp restore_all +#endif ALIGN reschedule: @@ -289,6 +327,9 @@ GET_CURRENT(%ebx) call *%edi addl $8,%esp +#ifdef CONFIG_PREEMPT + cli +#endif jmp ret_from_exception ENTRY(coprocessor_error) @@ -308,12 +349,18 @@ movl %cr0,%eax testl $0x4,%eax # EM (math emulation bit) jne device_not_available_emulate +#ifdef CONFIG_PREEMPT + cli +#endif call SYMBOL_NAME(math_state_restore) jmp ret_from_exception device_not_available_emulate: pushl $0 # temporary storage for ORIG_EIP call SYMBOL_NAME(math_emulate) addl $4,%esp +#ifdef CONFIG_PREEMPT + cli +#endif jmp ret_from_exception ENTRY(debug) diff -urN linux-2.4.12/arch/i386/kernel/i387.c linux/arch/i386/kernel/i387.c --- linux-2.4.12/arch/i386/kernel/i387.c Sat Oct 20 02:58:01 2001 +++ linux/arch/i386/kernel/i387.c Mon Nov 12 17:10:38 2001 @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -65,6 +66,8 @@ { struct task_struct *tsk = current; + preempt_disable(); + if (tsk->flags & PF_USEDFPU) { __save_init_fpu(tsk); return; diff -urN linux-2.4.12/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c --- linux-2.4.12/arch/i386/kernel/traps.c Sat Oct 20 02:58:01 2001 +++ linux/arch/i386/kernel/traps.c Mon Nov 12 17:10:38 2001 @@ -697,6 +697,11 @@ */ asmlinkage void math_state_restore(struct pt_regs regs) { + /* + * CONFIG_PREEMPT + * Must be called with preemption disabled + */ + __asm__ __volatile__("clts"); /* Allow maths ops (or we recurse) */ if (current->used_math) { diff -urN linux-2.4.12/arch/i386/lib/dec_and_lock.c linux/arch/i386/lib/dec_and_lock.c --- linux-2.4.12/arch/i386/lib/dec_and_lock.c Sat Oct 20 02:58:01 2001 +++ linux/arch/i386/lib/dec_and_lock.c Mon Nov 12 17:10:38 2001 @@ -8,6 +8,7 @@ */ #include +#include #include int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) diff -urN linux-2.4.12/drivers/char/console.c linux/drivers/char/console.c --- linux-2.4.12/drivers/char/console.c Thu Oct 11 19:54:11 2001 +++ linux/drivers/char/console.c Mon Nov 12 17:10:38 2001 @@ -2348,8 +2348,14 @@ return; pm_access(pm_con); + + /* + * If we raced with con_close(), `vt' may be null. + * Hence this bandaid. - akpm + */ acquire_console_sem(); - set_cursor(vt->vc_num); + if (vt) + set_cursor(vt->vc_num); release_console_sem(); } diff -urN linux-2.4.12/drivers/ieee1394/csr.c linux/drivers/ieee1394/csr.c --- linux-2.4.12/drivers/ieee1394/csr.c Sat Oct 20 02:58:01 2001 +++ linux/drivers/ieee1394/csr.c Mon Nov 12 17:10:38 2001 @@ -10,6 +10,7 @@ */ #include +#include #include "ieee1394_types.h" #include "hosts.h" diff -urN linux-2.4.12/fs/adfs/map.c linux/fs/adfs/map.c --- linux-2.4.12/fs/adfs/map.c Sat Oct 20 02:58:01 2001 +++ linux/fs/adfs/map.c Mon Nov 12 17:10:38 2001 @@ -12,6 +12,7 @@ #include #include #include +#include #include "adfs.h" diff -urN linux-2.4.12/fs/exec.c linux/fs/exec.c --- linux-2.4.12/fs/exec.c Sat Oct 20 02:58:01 2001 +++ linux/fs/exec.c Mon Nov 12 17:10:38 2001 @@ -419,8 +419,8 @@ active_mm = current->active_mm; current->mm = mm; current->active_mm = mm; - task_unlock(current); activate_mm(active_mm, mm); + task_unlock(current); mm_release(); if (old_mm) { if (active_mm != old_mm) BUG(); diff -urN linux-2.4.12/fs/fat/cache.c linux/fs/fat/cache.c --- linux-2.4.12/fs/fat/cache.c Sat Oct 20 02:58:01 2001 +++ linux/fs/fat/cache.c Mon Nov 12 17:10:38 2001 @@ -14,6 +14,7 @@ #include #include #include +#include #include "msbuffer.h" diff -urN linux-2.4.12/include/asm-arm/dma.h linux/include/asm-arm/dma.h --- linux-2.4.12/include/asm-arm/dma.h Thu Oct 11 19:54:01 2001 +++ linux/include/asm-arm/dma.h Mon Nov 12 17:10:38 2001 @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff -urN linux-2.4.12/include/asm-arm/hardirq.h linux/include/asm-arm/hardirq.h --- linux-2.4.12/include/asm-arm/hardirq.h Thu Oct 11 19:54:01 2001 +++ linux/include/asm-arm/hardirq.h Mon Nov 12 17:10:38 2001 @@ -33,6 +33,7 @@ #define irq_exit(cpu,irq) (local_irq_count(cpu)--) #define synchronize_irq() do { } while (0) +#define release_irqlock(cpu) do { } while (0) #else #error SMP not supported diff -urN linux-2.4.12/include/asm-arm/mmu_context.h linux/include/asm-arm/mmu_context.h --- linux-2.4.12/include/asm-arm/mmu_context.h Thu Oct 11 19:54:01 2001 +++ linux/include/asm-arm/mmu_context.h Mon Nov 12 17:10:38 2001 @@ -42,6 +42,10 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned int cpu) { +#ifdef CONFIG_PREEMPT + if (preempt_is_disable() == 0) + BUG(); +#endif if (prev != next) { cpu_switch_mm(next->pgd, tsk); clear_bit(cpu, &prev->cpu_vm_mask); diff -urN linux-2.4.12/include/asm-arm/pgalloc.h linux/include/asm-arm/pgalloc.h --- linux-2.4.12/include/asm-arm/pgalloc.h Thu Oct 11 19:54:01 2001 +++ linux/include/asm-arm/pgalloc.h Mon Nov 12 17:10:38 2001 @@ -57,40 +57,48 @@ { unsigned long *ret; + preempt_disable(); if ((ret = pgd_quicklist) != NULL) { pgd_quicklist = (unsigned long *)__pgd_next(ret); ret[1] = ret[2]; clean_dcache_entry(ret + 1); pgtable_cache_size--; } + preempt_enable(); return (pgd_t *)ret; } static inline void free_pgd_fast(pgd_t *pgd) { + preempt_disable(); __pgd_next(pgd) = (unsigned long) pgd_quicklist; pgd_quicklist = (unsigned long *) pgd; pgtable_cache_size++; + preempt_enable(); } static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) { unsigned long *ret; + preempt_disable(); if((ret = pte_quicklist) != NULL) { pte_quicklist = (unsigned long *)__pte_next(ret); ret[0] = 0; clean_dcache_entry(ret); pgtable_cache_size--; } + preempt_enable(); return (pte_t *)ret; } static inline void free_pte_fast(pte_t *pte) { + preempt_disable(); __pte_next(pte) = (unsigned long) pte_quicklist; pte_quicklist = (unsigned long *) pte; pgtable_cache_size++; + preempt_enable(); } #else /* CONFIG_NO_PGT_CACHE */ diff -urN linux-2.4.12/include/asm-arm/smplock.h linux/include/asm-arm/smplock.h --- linux-2.4.12/include/asm-arm/smplock.h Thu Oct 11 19:54:01 2001 +++ linux/include/asm-arm/smplock.h Mon Nov 12 17:10:38 2001 @@ -3,12 +3,17 @@ * * Default SMP lock implementation */ +#include #include #include extern spinlock_t kernel_flag; +#ifdef CONFIG_PREEMPT +#define kernel_locked() preempt_is_disable() +#else #define kernel_locked() spin_is_locked(&kernel_flag) +#endif /* * Release global kernel lock and global interrupt lock @@ -40,8 +45,14 @@ */ static inline void lock_kernel(void) { +#ifdef CONFIG_PREEMPT + if (current->lock_depth == -1) + spin_lock(&kernel_flag); + ++current->lock_depth; +#else if (!++current->lock_depth) spin_lock(&kernel_flag); +#endif } static inline void unlock_kernel(void) diff -urN linux-2.4.12/include/asm-arm/softirq.h linux/include/asm-arm/softirq.h --- linux-2.4.12/include/asm-arm/softirq.h Thu Oct 11 19:54:01 2001 +++ linux/include/asm-arm/softirq.h Mon Nov 12 17:10:38 2001 @@ -5,20 +5,22 @@ #include #define __cpu_bh_enable(cpu) \ - do { barrier(); local_bh_count(cpu)--; } while (0) + do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0) #define cpu_bh_disable(cpu) \ - do { local_bh_count(cpu)++; barrier(); } while (0) + do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0) #define local_bh_disable() cpu_bh_disable(smp_processor_id()) #define __local_bh_enable() __cpu_bh_enable(smp_processor_id()) #define in_softirq() (local_bh_count(smp_processor_id()) != 0) -#define local_bh_enable() \ +#define _local_bh_enable() \ do { \ unsigned int *ptr = &local_bh_count(smp_processor_id()); \ if (!--*ptr && ptr[-2]) \ __asm__("bl%? __do_softirq": : : "lr");/* out of line */\ } while (0) +#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0) + #endif /* __ASM_SOFTIRQ_H */ diff -urN linux-2.4.12/include/asm-i386/hardirq.h linux/include/asm-i386/hardirq.h --- linux-2.4.12/include/asm-i386/hardirq.h Sat Oct 20 02:58:01 2001 +++ linux/include/asm-i386/hardirq.h Mon Nov 12 17:10:38 2001 @@ -36,6 +36,8 @@ #define synchronize_irq() barrier() +#define release_irqlock(cpu) do { } while (0) + #else #include diff -urN linux-2.4.12/include/asm-i386/highmem.h linux/include/asm-i386/highmem.h --- linux-2.4.12/include/asm-i386/highmem.h Sat Oct 20 02:58:01 2001 +++ linux/include/asm-i386/highmem.h Mon Nov 12 17:10:38 2001 @@ -88,6 +88,7 @@ enum fixed_addresses idx; unsigned long vaddr; + preempt_disable(); if (page < highmem_start_page) return page_address(page); @@ -109,8 +110,10 @@ unsigned long vaddr = (unsigned long) kvaddr; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - if (vaddr < FIXADDR_START) // FIXME + if (vaddr < FIXADDR_START) { // FIXME + preempt_enable(); return; + } if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) BUG(); @@ -122,6 +125,8 @@ pte_clear(kmap_pte-idx); __flush_tlb_one(vaddr); #endif + + preempt_enable(); } #endif /* __KERNEL__ */ diff -urN linux-2.4.12/include/asm-i386/hw_irq.h linux/include/asm-i386/hw_irq.h --- linux-2.4.12/include/asm-i386/hw_irq.h Sat Oct 20 02:58:01 2001 +++ linux/include/asm-i386/hw_irq.h Mon Nov 12 17:10:38 2001 @@ -95,6 +95,18 @@ #define __STR(x) #x #define STR(x) __STR(x) +#define GET_CURRENT \ + "movl %esp, %ebx\n\t" \ + "andl $-8192, %ebx\n\t" + +#ifdef CONFIG_PREEMPT +#define BUMP_CONTEX_SWITCH_LOCK \ + GET_CURRENT \ + "incl 4(%ebx)\n\t" +#else +#define BUMP_CONTEX_SWITCH_LOCK +#endif + #define SAVE_ALL \ "cld\n\t" \ "pushl %es\n\t" \ @@ -108,15 +120,12 @@ "pushl %ebx\n\t" \ "movl $" STR(__KERNEL_DS) ",%edx\n\t" \ "movl %edx,%ds\n\t" \ - "movl %edx,%es\n\t" + "movl %edx,%es\n\t" \ + BUMP_CONTEX_SWITCH_LOCK #define IRQ_NAME2(nr) nr##_interrupt(void) #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) -#define GET_CURRENT \ - "movl %esp, %ebx\n\t" \ - "andl $-8192, %ebx\n\t" - /* * SMP has a few special interrupts for IPI messages */ diff -urN linux-2.4.12/include/asm-i386/i387.h linux/include/asm-i386/i387.h --- linux-2.4.12/include/asm-i386/i387.h Sat Oct 20 02:58:01 2001 +++ linux/include/asm-i386/i387.h Mon Nov 12 17:10:38 2001 @@ -12,6 +12,7 @@ #define __ASM_I386_I387_H #include +#include #include #include #include @@ -24,7 +25,7 @@ extern void restore_fpu( struct task_struct *tsk ); extern void kernel_fpu_begin(void); -#define kernel_fpu_end() stts() +#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0) #define unlazy_fpu( tsk ) do { \ diff -urN linux-2.4.12/include/asm-i386/mmu_context.h linux/include/asm-i386/mmu_context.h --- linux-2.4.12/include/asm-i386/mmu_context.h Sat Oct 20 02:58:01 2001 +++ linux/include/asm-i386/mmu_context.h Mon Nov 12 17:10:38 2001 @@ -27,6 +27,10 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) { +#ifdef CONFIG_PREEMPT + if (preempt_is_disabled() == 0) + BUG(); +#endif if (prev != next) { /* stop flush ipis for the previous mm */ clear_bit(cpu, &prev->cpu_vm_mask); diff -urN linux-2.4.12/include/asm-i386/pgalloc.h linux/include/asm-i386/pgalloc.h --- linux-2.4.12/include/asm-i386/pgalloc.h Sat Oct 20 02:58:01 2001 +++ linux/include/asm-i386/pgalloc.h Mon Nov 12 17:10:38 2001 @@ -65,20 +65,26 @@ { unsigned long *ret; + preempt_disable(); if ((ret = pgd_quicklist) != NULL) { pgd_quicklist = (unsigned long *)(*ret); ret[0] = 0; pgtable_cache_size--; - } else + preempt_enable(); + } else { + preempt_enable(); ret = (unsigned long *)get_pgd_slow(); + } return (pgd_t *)ret; } static __inline__ void free_pgd_fast(pgd_t *pgd) { + preempt_disable(); *(unsigned long *)pgd = (unsigned long) pgd_quicklist; pgd_quicklist = (unsigned long *) pgd; pgtable_cache_size++; + preempt_enable(); } static __inline__ void free_pgd_slow(pgd_t *pgd) @@ -108,19 +114,23 @@ { unsigned long *ret; + preempt_disable(); if ((ret = (unsigned long *)pte_quicklist) != NULL) { pte_quicklist = (unsigned long *)(*ret); ret[0] = ret[1]; pgtable_cache_size--; } + preempt_enable(); return (pte_t *)ret; } static __inline__ void pte_free_fast(pte_t *pte) { + preempt_disable(); *(unsigned long *)pte = (unsigned long) pte_quicklist; pte_quicklist = (unsigned long *) pte; pgtable_cache_size++; + preempt_enable(); } static __inline__ void pte_free_slow(pte_t *pte) diff -urN linux-2.4.12/include/asm-i386/processor.h linux/include/asm-i386/processor.h --- linux-2.4.12/include/asm-i386/processor.h Sat Oct 20 02:58:01 2001 +++ linux/include/asm-i386/processor.h Mon Nov 12 17:10:38 2001 @@ -502,7 +502,10 @@ { __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); } -#define spin_lock_prefetch(x) prefetchw(x) +#define spin_lock_prefetch(x) do { \ + prefetchw(x); \ + preempt_prefetch(¤t->preempt_count); \ +} while(0) #endif diff -urN linux-2.4.12/include/asm-i386/smplock.h linux/include/asm-i386/smplock.h --- linux-2.4.12/include/asm-i386/smplock.h Sat Oct 20 02:58:01 2001 +++ linux/include/asm-i386/smplock.h Mon Nov 12 17:10:38 2001 @@ -10,7 +10,15 @@ extern spinlock_t kernel_flag; +#ifdef CONFIG_SMP #define kernel_locked() spin_is_locked(&kernel_flag) +#else +#ifdef CONFIG_PREEMPT +#define kernel_locked() preempt_is_disabled() +#else +#define kernel_locked() 1 +#endif +#endif /* * Release global kernel lock and global interrupt lock @@ -42,6 +50,11 @@ */ static __inline__ void lock_kernel(void) { +#ifdef CONFIG_PREEMPT + if (current->lock_depth == -1) + spin_lock(&kernel_flag); + ++current->lock_depth; +#else #if 1 if (!++current->lock_depth) spin_lock(&kernel_flag); @@ -54,6 +67,7 @@ :"=m" (__dummy_lock(&kernel_flag)), "=m" (current->lock_depth)); #endif +#endif } static __inline__ void unlock_kernel(void) diff -urN linux-2.4.12/include/asm-i386/softirq.h linux/include/asm-i386/softirq.h --- linux-2.4.12/include/asm-i386/softirq.h Sat Oct 20 02:58:01 2001 +++ linux/include/asm-i386/softirq.h Mon Nov 12 17:10:38 2001 @@ -5,9 +5,9 @@ #include #define __cpu_bh_enable(cpu) \ - do { barrier(); local_bh_count(cpu)--; } while (0) + do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0) #define cpu_bh_disable(cpu) \ - do { local_bh_count(cpu)++; barrier(); } while (0) + do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0) #define local_bh_disable() cpu_bh_disable(smp_processor_id()) #define __local_bh_enable() __cpu_bh_enable(smp_processor_id()) @@ -22,7 +22,7 @@ * If you change the offsets in irq_stat then you have to * update this code as well. */ -#define local_bh_enable() \ +#define _local_bh_enable() \ do { \ unsigned int *ptr = &local_bh_count(smp_processor_id()); \ \ @@ -45,4 +45,6 @@ /* no registers clobbered */ ); \ } while (0) +#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0) + #endif /* __ASM_SOFTIRQ_H */ diff -urN linux-2.4.12/include/asm-i386/spinlock.h linux/include/asm-i386/spinlock.h --- linux-2.4.12/include/asm-i386/spinlock.h Sat Oct 20 02:58:01 2001 +++ linux/include/asm-i386/spinlock.h Mon Nov 12 17:10:38 2001 @@ -70,7 +70,7 @@ #define spin_unlock_string \ "movb $1,%0" -static inline int spin_trylock(spinlock_t *lock) +static inline int _raw_spin_trylock(spinlock_t *lock) { char oldval; __asm__ __volatile__( @@ -80,7 +80,7 @@ return oldval > 0; } -static inline void spin_lock(spinlock_t *lock) +static inline void _raw_spin_lock(spinlock_t *lock) { #if SPINLOCK_DEBUG __label__ here; @@ -95,7 +95,7 @@ :"=m" (lock->lock) : : "memory"); } -static inline void spin_unlock(spinlock_t *lock) +static inline void _raw_spin_unlock(spinlock_t *lock) { #if SPINLOCK_DEBUG if (lock->magic != SPINLOCK_MAGIC) @@ -148,7 +148,7 @@ */ /* the spinlock helpers are in arch/i386/kernel/semaphore.c */ -static inline void read_lock(rwlock_t *rw) +static inline void _raw_read_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -157,7 +157,7 @@ __build_read_lock(rw, "__read_lock_failed"); } -static inline void write_lock(rwlock_t *rw) +static inline void _raw_write_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -166,10 +166,10 @@ __build_write_lock(rw, "__write_lock_failed"); } -#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") -#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") +#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") +#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") -static inline int write_trylock(rwlock_t *lock) +static inline int _raw_write_trylock(rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; if (atomic_sub_and_test(RW_LOCK_BIAS, count)) diff -urN linux-2.4.12/include/linux/brlock.h linux/include/linux/brlock.h --- linux-2.4.12/include/linux/brlock.h Sat Oct 20 02:58:01 2001 +++ linux/include/linux/brlock.h Mon Nov 12 17:10:38 2001 @@ -171,11 +171,11 @@ } #else -# define br_read_lock(idx) ((void)(idx)) -# define br_read_unlock(idx) ((void)(idx)) -# define br_write_lock(idx) ((void)(idx)) -# define br_write_unlock(idx) ((void)(idx)) -#endif +# define br_read_lock(idx) ({ (void)(idx); preempt_disable(); }) +# define br_read_unlock(idx) ({ (void)(idx); preempt_enable(); }) +# define br_write_lock(idx) ({ (void)(idx); preempt_disable(); }) +# define br_write_unlock(idx) ({ (void)(idx); preempt_enable(); }) +#endif /* CONFIG_SMP */ /* * Now enumerate all of the possible sw/hw IRQ protected diff -urN linux-2.4.12/include/linux/dcache.h linux/include/linux/dcache.h --- linux-2.4.12/include/linux/dcache.h Sat Oct 20 02:58:01 2001 +++ linux/include/linux/dcache.h Mon Nov 12 17:10:38 2001 @@ -126,31 +126,6 @@ extern spinlock_t dcache_lock; -/** - * d_drop - drop a dentry - * @dentry: dentry to drop - * - * d_drop() unhashes the entry from the parent - * dentry hashes, so that it won't be found through - * a VFS lookup any more. Note that this is different - * from deleting the dentry - d_delete will try to - * mark the dentry negative if possible, giving a - * successful _negative_ lookup, while d_drop will - * just make the cache lookup fail. - * - * d_drop() is used mainly for stuff that wants - * to invalidate a dentry for some reason (NFS - * timeouts or autofs deletes). - */ - -static __inline__ void d_drop(struct dentry * dentry) -{ - spin_lock(&dcache_lock); - list_del(&dentry->d_hash); - INIT_LIST_HEAD(&dentry->d_hash); - spin_unlock(&dcache_lock); -} - static __inline__ int dname_external(struct dentry *d) { return d->d_name.name != d->d_iname; @@ -275,3 +250,34 @@ #endif /* __KERNEL__ */ #endif /* __LINUX_DCACHE_H */ + +#if !defined(__LINUX_DCACHE_H_INLINES) && defined(_TASK_STRUCT_DEFINED) +#define __LINUX_DCACHE_H_INLINES + +#ifdef __KERNEL__ +/** + * d_drop - drop a dentry + * @dentry: dentry to drop + * + * d_drop() unhashes the entry from the parent + * dentry hashes, so that it won't be found through + * a VFS lookup any more. Note that this is different + * from deleting the dentry - d_delete will try to + * mark the dentry negative if possible, giving a + * successful _negative_ lookup, while d_drop will + * just make the cache lookup fail. + * + * d_drop() is used mainly for stuff that wants + * to invalidate a dentry for some reason (NFS + * timeouts or autofs deletes). + */ + +static __inline__ void d_drop(struct dentry * dentry) +{ + spin_lock(&dcache_lock); + list_del(&dentry->d_hash); + INIT_LIST_HEAD(&dentry->d_hash); + spin_unlock(&dcache_lock); +} +#endif +#endif diff -urN linux-2.4.12/include/linux/fs_struct.h linux/include/linux/fs_struct.h --- linux-2.4.12/include/linux/fs_struct.h Sat Oct 20 02:58:01 2001 +++ linux/include/linux/fs_struct.h Mon Nov 12 17:10:38 2001 @@ -20,6 +20,15 @@ extern void exit_fs(struct task_struct *); extern void set_fs_altroot(void); +struct fs_struct *copy_fs_struct(struct fs_struct *old); +void put_fs_struct(struct fs_struct *fs); + +#endif +#endif + +#if !defined(_LINUX_FS_STRUCT_H_INLINES) && defined(_TASK_STRUCT_DEFINED) +#define _LINUX_FS_STRUCT_H_INLINES +#ifdef __KERNEL__ /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. * It can block. Requires the big lock held. @@ -65,9 +74,5 @@ mntput(old_pwdmnt); } } - -struct fs_struct *copy_fs_struct(struct fs_struct *old); -void put_fs_struct(struct fs_struct *fs); - #endif #endif diff -urN linux-2.4.12/include/linux/sched.h linux/include/linux/sched.h --- linux-2.4.12/include/linux/sched.h Sat Oct 20 02:58:01 2001 +++ linux/include/linux/sched.h Mon Nov 12 17:10:38 2001 @@ -88,6 +88,7 @@ #define TASK_UNINTERRUPTIBLE 2 #define TASK_ZOMBIE 4 #define TASK_STOPPED 8 +#define PREEMPT_ACTIVE 0x40000000 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -154,6 +155,9 @@ #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long FASTCALL(schedule_timeout(signed long timeout)); asmlinkage void schedule(void); +#ifdef CONFIG_PREEMPT +asmlinkage void preempt_schedule(void); +#endif extern int schedule_task(struct tq_struct *task); extern void flush_scheduled_tasks(void); @@ -283,7 +287,17 @@ * offsets of these are hardcoded elsewhere - touch with care */ volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ - unsigned long flags; /* per process flags, defined below */ + /* + * We want the preempt_count in this cache line, but we + * a) don't want to mess up the offsets in asm code, and + * b) the alignment of the next line below, + * so we move "flags" down + * + * Also note we don't make preempt_count volatile, but we do + * need to make sure it is never hiding in a register when + * we have an interrupt, so we need to use barrier() + */ + int preempt_count; /* 0=> preemptable, < 0 => BUG */ int sigpending; mm_segment_t addr_limit; /* thread address space: 0-0xBFFFFFFF for user-thead @@ -317,6 +331,7 @@ struct mm_struct *active_mm; struct list_head local_pages; unsigned int allocation_order, nr_local_pages; + unsigned long flags; /* task state */ struct linux_binfmt *binfmt; @@ -899,6 +914,11 @@ return res; } +#define _TASK_STRUCT_DEFINED +#include +#include +#include + #endif /* __KERNEL__ */ #endif diff -urN linux-2.4.12/include/linux/smp.h linux/include/linux/smp.h --- linux-2.4.12/include/linux/smp.h Sat Oct 20 02:58:01 2001 +++ linux/include/linux/smp.h Mon Nov 12 17:10:38 2001 @@ -81,7 +81,9 @@ #define smp_processor_id() 0 #define hard_smp_processor_id() 0 #define smp_threads_ready 1 +#ifndef CONFIG_PREEMPT #define kernel_lock() +#endif #define cpu_logical_map(cpu) 0 #define cpu_number_map(cpu) 0 #define smp_call_function(func,info,retry,wait) ({ 0; }) diff -urN linux-2.4.12/include/linux/smp_lock.h linux/include/linux/smp_lock.h --- linux-2.4.12/include/linux/smp_lock.h Sat Oct 20 02:58:01 2001 +++ linux/include/linux/smp_lock.h Mon Nov 12 17:10:38 2001 @@ -3,7 +3,7 @@ #include -#ifndef CONFIG_SMP +#if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT) #define lock_kernel() do { } while(0) #define unlock_kernel() do { } while(0) diff -urN linux-2.4.12/include/linux/spinlock.h linux/include/linux/spinlock.h --- linux-2.4.12/include/linux/spinlock.h Sat Oct 20 02:58:01 2001 +++ linux/include/linux/spinlock.h Mon Nov 12 17:10:38 2001 @@ -41,8 +41,10 @@ #if (DEBUG_SPINLOCKS < 1) +#ifndef CONFIG_PREEMPT #define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic) #define ATOMIC_DEC_AND_LOCK +#endif /* * Your basic spinlocks, allowing only a single CPU anywhere @@ -58,11 +60,11 @@ #endif #define spin_lock_init(lock) do { } while(0) -#define spin_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_spin_lock(lock) (void)(lock) /* Not "unused variable". */ #define spin_is_locked(lock) (0) -#define spin_trylock(lock) ({1; }) +#define _raw_spin_trylock(lock) ({1; }) #define spin_unlock_wait(lock) do { } while(0) -#define spin_unlock(lock) do { } while(0) +#define _raw_spin_unlock(lock) do { } while(0) #elif (DEBUG_SPINLOCKS < 2) @@ -121,13 +123,77 @@ #endif #define rwlock_init(lock) do { } while(0) -#define read_lock(lock) (void)(lock) /* Not "unused variable". */ -#define read_unlock(lock) do { } while(0) -#define write_lock(lock) (void)(lock) /* Not "unused variable". */ -#define write_unlock(lock) do { } while(0) +#define _raw_read_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_read_unlock(lock) do { } while(0) +#define _raw_write_lock(lock) (void)(lock) /* Not "unused variable". */ +#define _raw_write_unlock(lock) do { } while(0) #endif /* !SMP */ +#ifdef CONFIG_PREEMPT + +#define preempt_is_disabled() (current->preempt_count) +#define preempt_prefetch(a) prefetchw(a) + +#define preempt_disable() \ +do { \ + ++current->preempt_count; \ + barrier(); \ +} while (0) + +#define preempt_enable_no_resched() \ +do { \ + --current->preempt_count; \ + barrier(); \ +} while (0) + +#define preempt_enable() \ +do { \ + --current->preempt_count; \ + barrier(); \ + if ((current->preempt_count == 0) && current->need_resched) \ + preempt_schedule(); \ +} while (0) + +#define spin_lock(lock) \ +do { \ + preempt_disable(); \ + _raw_spin_lock(lock); \ +} while(0) +#define spin_trylock(lock) ({preempt_disable(); _raw_spin_trylock(lock) ? \ + 1 : ({preempt_enable(); 0;});}) +#define spin_unlock(lock) \ +do { \ + _raw_spin_unlock(lock); \ + preempt_enable(); \ +} while (0) + +#define read_lock(lock) ({preempt_disable(); _raw_read_lock(lock);}) +#define read_unlock(lock) ({_raw_read_unlock(lock); preempt_enable();}) +#define write_lock(lock) ({preempt_disable(); _raw_write_lock(lock);}) +#define write_unlock(lock) ({_raw_write_unlock(lock); preempt_enable();}) +#define write_trylock(lock) ({preempt_disable(); _raw_write_trylock(lock) ? \ + 1 : ({preempt_enable(); 0;});}) + +#else + +#define preempt_is_disabled() do { } while (0) +#define preempt_disable() do { } while (0) +#define preempt_enable_no_resched() +#define preempt_enable() do { } while (0) +#define preempt_prefetch(a) + +#define spin_lock(lock) _raw_spin_lock(lock) +#define spin_trylock(lock) _raw_spin_trylock(lock) +#define spin_unlock(lock) _raw_spin_unlock(lock) + +#define read_lock(lock) _raw_read_lock(lock) +#define read_unlock(lock) _raw_read_unlock(lock) +#define write_lock(lock) _raw_write_lock(lock) +#define write_unlock(lock) _raw_write_unlock(lock) +#define write_trylock(lock) _raw_write_trylock(lock) +#endif + /* "lock on reference count zero" */ #ifndef ATOMIC_DEC_AND_LOCK #include diff -urN linux-2.4.12/include/linux/tqueue.h linux/include/linux/tqueue.h --- linux-2.4.12/include/linux/tqueue.h Sat Oct 20 02:58:01 2001 +++ linux/include/linux/tqueue.h Mon Nov 12 17:10:38 2001 @@ -94,6 +94,22 @@ extern spinlock_t tqueue_lock; /* + * Call all "bottom halfs" on a given list. + */ + +extern void __run_task_queue(task_queue *list); + +static inline void run_task_queue(task_queue *list) +{ + if (TQ_ACTIVE(*list)) + __run_task_queue(list); +} + +#endif /* _LINUX_TQUEUE_H */ + +#if !defined(_LINUX_TQUEUE_H_INLINES) && defined(_TASK_STRUCT_DEFINED) +#define _LINUX_TQUEUE_H_INLINES +/* * Queue a task on a tq. Return non-zero if it was successfully * added. */ @@ -109,17 +125,4 @@ } return ret; } - -/* - * Call all "bottom halfs" on a given list. - */ - -extern void __run_task_queue(task_queue *list); - -static inline void run_task_queue(task_queue *list) -{ - if (TQ_ACTIVE(*list)) - __run_task_queue(list); -} - -#endif /* _LINUX_TQUEUE_H */ +#endif diff -urN linux-2.4.12/kernel/exit.c linux/kernel/exit.c --- linux-2.4.12/kernel/exit.c Sat Oct 20 02:58:01 2001 +++ linux/kernel/exit.c Mon Nov 12 17:10:38 2001 @@ -279,6 +279,10 @@ struct mm_struct * start_lazy_tlb(void) { struct mm_struct *mm = current->mm; +#ifdef CONFIG_PREEMPT + if (preempt_is_disabled() == 0) + BUG(); +#endif current->mm = NULL; /* active_mm is still 'mm' */ atomic_inc(&mm->mm_count); @@ -290,6 +294,10 @@ { struct mm_struct *active_mm = current->active_mm; +#ifdef CONFIG_PREEMPT + if (preempt_is_disabled() == 0) + BUG(); +#endif current->mm = mm; if (mm != active_mm) { current->active_mm = mm; @@ -313,8 +321,8 @@ /* more a memory barrier than a real lock */ task_lock(tsk); tsk->mm = NULL; - task_unlock(tsk); enter_lazy_tlb(mm, current, smp_processor_id()); + task_unlock(tsk); mmput(mm); } } diff -urN linux-2.4.12/kernel/fork.c linux/kernel/fork.c --- linux-2.4.12/kernel/fork.c Sat Oct 20 02:58:01 2001 +++ linux/kernel/fork.c Mon Nov 12 17:10:38 2001 @@ -604,6 +604,12 @@ if (p->binfmt && p->binfmt->module) __MOD_INC_USE_COUNT(p->binfmt->module); +#ifdef CONFIG_PREEMPT + /* Since we are keeping the context switch off state as part + * of the context, make sure we start with it off. + */ + p->preempt_count = 1; +#endif p->did_exec = 0; p->swappable = 0; p->state = TASK_UNINTERRUPTIBLE; diff -urN linux-2.4.12/kernel/ksyms.c linux/kernel/ksyms.c --- linux-2.4.12/kernel/ksyms.c Sat Oct 20 02:58:01 2001 +++ linux/kernel/ksyms.c Mon Nov 12 17:10:38 2001 @@ -431,6 +431,9 @@ EXPORT_SYMBOL(interruptible_sleep_on); EXPORT_SYMBOL(interruptible_sleep_on_timeout); EXPORT_SYMBOL(schedule); +#ifdef CONFIG_PREEMPT +EXPORT_SYMBOL(preempt_schedule); +#endif EXPORT_SYMBOL(schedule_timeout); EXPORT_SYMBOL(jiffies); EXPORT_SYMBOL(xtime); diff -urN linux-2.4.12/kernel/sched.c linux/kernel/sched.c --- linux-2.4.12/kernel/sched.c Sat Oct 20 02:58:01 2001 +++ linux/kernel/sched.c Mon Nov 12 17:10:38 2001 @@ -475,7 +475,7 @@ task_lock(prev); prev->has_cpu = 0; mb(); - if (prev->state == TASK_RUNNING) + if (task_on_runqueue(prev)) goto needs_resched; out_unlock: @@ -505,7 +505,7 @@ goto out_unlock; spin_lock_irqsave(&runqueue_lock, flags); - if ((prev->state == TASK_RUNNING) && !prev->has_cpu) + if (task_on_runqueue(prev) && !prev->has_cpu) reschedule_idle(prev); spin_unlock_irqrestore(&runqueue_lock, flags); goto out_unlock; @@ -518,6 +518,7 @@ void schedule_tail(struct task_struct *prev) { __schedule_tail(prev); + preempt_enable(); } /* @@ -540,6 +541,8 @@ spin_lock_prefetch(&runqueue_lock); + preempt_disable(); + if (!current->active_mm) BUG(); need_resched_back: prev = current; @@ -563,6 +566,9 @@ goto move_rr_last; move_rr_back: +#ifdef CONFIG_PREEMPT + if (preempt_is_disabled() & PREEMPT_ACTIVE) goto treat_like_run; +#endif switch (prev->state) { case TASK_INTERRUPTIBLE: if (signal_pending(prev)) { @@ -573,6 +579,9 @@ del_from_runqueue(prev); case TASK_RUNNING:; } +#ifdef CONFIG_PREEMPT + treat_like_run: +#endif prev->need_resched = 0; /* @@ -585,7 +594,7 @@ */ next = idle_task(this_cpu); c = -1000; - if (prev->state == TASK_RUNNING) + if (task_on_runqueue(prev)) goto still_running; still_running_back: @@ -678,7 +687,7 @@ reacquire_kernel_lock(current); if (current->need_resched) goto need_resched_back; - + preempt_enable_no_resched(); return; recalculate: @@ -988,6 +997,34 @@ return setscheduler(pid, -1, param); } +#ifdef CONFIG_PREEMPT + +#ifdef CONFIG_SMP +#define lock_to_this_cpu() \ + unsigned long old_cpus_allowed = current->cpus_allowed; \ + current->cpus_allowed = 1UL << smp_processor_id() +#define restore_cpus_allowed() current->cpus_allowed = old_cpus_allowed +#else +#define lock_to_this_cpu() +#define restore_cpus_allowed() +#endif /* !CONFIG_SMP */ + +asmlinkage void preempt_schedule(void) +{ + while (current->need_resched) { + /* it would be ideal not to lock tasks to their cpu here, + * but only around the data that needs such locking */ + lock_to_this_cpu(); + current->preempt_count += PREEMPT_ACTIVE + 1; + barrier(); + schedule(); + current->preempt_count -= PREEMPT_ACTIVE + 1; + barrier(); + restore_cpus_allowed(); + } +} +#endif /* CONFIG_PREEMPT */ + asmlinkage long sys_sched_getscheduler(pid_t pid) { struct task_struct *p; diff -urN linux-2.4.12/lib/dec_and_lock.c linux/lib/dec_and_lock.c --- linux-2.4.12/lib/dec_and_lock.c Sat Oct 20 02:58:01 2001 +++ linux/lib/dec_and_lock.c Mon Nov 12 17:10:38 2001 @@ -1,5 +1,6 @@ #include #include +#include #include /* diff -urN linux-2.4.12/mm/slab.c linux/mm/slab.c --- linux-2.4.12/mm/slab.c Thu Oct 11 19:53:59 2001 +++ linux/mm/slab.c Mon Nov 12 17:10:38 2001 @@ -49,7 +49,9 @@ * constructors and destructors are called without any locking. * Several members in kmem_cache_t and slab_t never change, they * are accessed without any locking. - * The per-cpu arrays are never accessed from the wrong cpu, no locking. + * The per-cpu arrays are never accessed from the wrong cpu, no locking, + * they are however called with local interrupts disabled so no + * preempt_disable needed. * The non-constant members are protected with a per-cache irq spinlock. * * Further notes from the original documentation: diff -urN linux-2.4.12/net/socket.c linux/net/socket.c --- linux-2.4.12/net/socket.c Sat Oct 20 02:58:01 2001 +++ linux/net/socket.c Mon Nov 12 17:10:38 2001 @@ -135,7 +135,7 @@ static struct net_proto_family *net_families[NPROTO]; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) static atomic_t net_family_lockct = ATOMIC_INIT(0); static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;