diff -urN 2.4.0-test9-pre5/arch/alpha/Makefile z/arch/alpha/Makefile --- 2.4.0-test9-pre5/arch/alpha/Makefile Tue Sep 12 02:32:45 2000 +++ z/arch/alpha/Makefile Fri Sep 22 17:47:44 2000 @@ -119,6 +119,10 @@ archdep: @$(MAKEBOOT) dep + +vmlinux: arch/alpha/vmlinux.lds + +arch/alpha/vmlinux.lds: arch/alpha/vmlinux.lds.in $(CPP) $(CPPFLAGS) -xc -P arch/alpha/vmlinux.lds.in -o arch/alpha/vmlinux.lds bootpfile: diff -urN 2.4.0-test9-pre5/arch/alpha/kernel/pci_iommu.c z/arch/alpha/kernel/pci_iommu.c --- 2.4.0-test9-pre5/arch/alpha/kernel/pci_iommu.c Sat Jun 24 16:02:27 2000 +++ z/arch/alpha/kernel/pci_iommu.c Fri Sep 22 17:47:44 2000 @@ -416,7 +416,9 @@ ptes = &arena->ptes[dma_ofs]; sg = leader; do { +#if DEBUG_ALLOC > 0 struct scatterlist *last_sg = sg; +#endif size = sg->length; paddr = virt_to_phys(sg->address); diff -urN 2.4.0-test9-pre5/arch/alpha/kernel/smp.c z/arch/alpha/kernel/smp.c --- 2.4.0-test9-pre5/arch/alpha/kernel/smp.c Tue Sep 12 02:32:46 2000 +++ z/arch/alpha/kernel/smp.c Fri Sep 22 17:47:44 2000 @@ -1046,8 +1046,8 @@ " blbs %0,2b\n" " br 1b\n" ".previous" - : "=r" (tmp), "=m" (__dummy_lock(lock)), "=r" (stuck) - : "1" (__dummy_lock(lock)), "2" (stuck)); + : "=r" (tmp), "=m" (lock->lock), "=r" (stuck) + : "1" (lock->lock), "2" (stuck) : "memory"); if (stuck < 0) { printk(KERN_WARNING @@ -1124,9 +1124,9 @@ " blt %1,8b\n" " br 1b\n" ".previous" - : "=m" (__dummy_lock(lock)), "=&r" (regx), "=&r" (regy), + : "=m" (*(volatile int *)lock), "=&r" (regx), "=&r" (regy), "=&r" (stuck_lock), "=&r" (stuck_reader) - : "0" (__dummy_lock(lock)), "3" (stuck_lock), "4" (stuck_reader)); + : "0" (*(volatile int *)lock), "3" (stuck_lock), "4" (stuck_reader) : "memory"); if (stuck_lock < 0) { printk(KERN_WARNING "write_lock stuck at %p\n", inline_pc); @@ -1163,8 +1163,8 @@ " blbs %1,6b;" " br 1b\n" ".previous" - : "=m" (__dummy_lock(lock)), "=&r" (regx), "=&r" (stuck_lock) - : "0" (__dummy_lock(lock)), "2" (stuck_lock)); + : "=m" (*(volatile int *)lock), "=&r" (regx), "=&r" (stuck_lock) + : "0" (*(volatile int *)lock), "2" (stuck_lock) : "memory"); if (stuck_lock < 0) { printk(KERN_WARNING "read_lock stuck at %p\n", inline_pc); diff -urN 2.4.0-test9-pre5/arch/alpha/mm/extable.c z/arch/alpha/mm/extable.c --- 2.4.0-test9-pre5/arch/alpha/mm/extable.c Sat Jun 24 16:02:27 2000 +++ z/arch/alpha/mm/extable.c Fri Sep 22 17:47:44 2000 @@ -88,7 +88,7 @@ */ ret = search_exception_table_without_gp(addr); if (ret) { - printk(KERN_ALERT, "%s: [%lx] EX_TABLE search fail with" + printk(KERN_ALERT "%s: [%lx] EX_TABLE search fail with" "exc frame GP, success with raw GP\n", current->comm, addr); return ret; diff -urN 2.4.0-test9-pre5/include/asm-alpha/atomic.h z/include/asm-alpha/atomic.h --- 2.4.0-test9-pre5/include/asm-alpha/atomic.h Sun Sep 3 23:48:31 2000 +++ z/include/asm-alpha/atomic.h Fri Sep 22 17:47:44 2000 @@ -11,11 +11,13 @@ * than regular operations. */ -#ifdef CONFIG_SMP + +/* + * Counter is volatile to make sure gcc doesn't try to be clever + * and move things around on us. We need to use _exactly_ the address + * the user gave us, not some alias that contains the same information. + */ typedef struct { volatile int counter; } atomic_t; -#else -typedef struct { int counter; } atomic_t; -#endif #define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) @@ -23,19 +25,12 @@ #define atomic_set(v,i) ((v)->counter = (i)) /* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -#define __atomic_fool_gcc(x) (*(struct { int a[100]; } *)x) - -/* * To get proper branch prediction for the main line, we must branch * forward to code at the end of this object's .text section, then * branch back to restart the operation. */ -extern __inline__ void atomic_add(int i, atomic_t * v) +static __inline__ void atomic_add(int i, atomic_t * v) { unsigned long temp; __asm__ __volatile__( @@ -46,11 +41,11 @@ ".subsection 2\n" "2: br 1b\n" ".previous" - :"=&r" (temp), "=m" (__atomic_fool_gcc(v)) - :"Ir" (i), "m" (__atomic_fool_gcc(v))); + :"=&r" (temp), "=m" (v->counter) + :"Ir" (i), "m" (v->counter)); } -extern __inline__ void atomic_sub(int i, atomic_t * v) +static __inline__ void atomic_sub(int i, atomic_t * v) { unsigned long temp; __asm__ __volatile__( @@ -61,14 +56,14 @@ ".subsection 2\n" "2: br 1b\n" ".previous" - :"=&r" (temp), "=m" (__atomic_fool_gcc(v)) - :"Ir" (i), "m" (__atomic_fool_gcc(v))); + :"=&r" (temp), "=m" (v->counter) + :"Ir" (i), "m" (v->counter)); } /* * Same as above, but return the result value */ -extern __inline__ long atomic_add_return(int i, atomic_t * v) +static __inline__ long atomic_add_return(int i, atomic_t * v) { long temp, result; __asm__ __volatile__( @@ -81,12 +76,12 @@ ".subsection 2\n" "2: br 1b\n" ".previous" - :"=&r" (temp), "=m" (__atomic_fool_gcc(v)), "=&r" (result) - :"Ir" (i), "m" (__atomic_fool_gcc(v))); + :"=&r" (temp), "=m" (v->counter), "=&r" (result) + :"Ir" (i), "m" (v->counter) : "memory"); return result; } -extern __inline__ long atomic_sub_return(int i, atomic_t * v) +static __inline__ long atomic_sub_return(int i, atomic_t * v) { long temp, result; __asm__ __volatile__( @@ -99,8 +94,8 @@ ".subsection 2\n" "2: br 1b\n" ".previous" - :"=&r" (temp), "=m" (__atomic_fool_gcc(v)), "=&r" (result) - :"Ir" (i), "m" (__atomic_fool_gcc(v))); + :"=&r" (temp), "=m" (v->counter), "=&r" (result) + :"Ir" (i), "m" (v->counter) : "memory"); return result; } diff -urN 2.4.0-test9-pre5/include/asm-alpha/bitops.h z/include/asm-alpha/bitops.h --- 2.4.0-test9-pre5/include/asm-alpha/bitops.h Tue Sep 12 02:32:47 2000 +++ z/include/asm-alpha/bitops.h Fri Sep 22 17:47:44 2000 @@ -1,6 +1,8 @@ #ifndef _ALPHA_BITOPS_H #define _ALPHA_BITOPS_H +#include + /* * Copyright 1994, Linus Torvalds. */ @@ -17,14 +19,19 @@ * bit 0 is the LSB of addr; bit 64 is the LSB of (addr+1). */ +#define BITOPS_NO_BRANCH + extern __inline__ void set_bit(unsigned long nr, volatile void * addr) { +#ifndef BITOPS_NO_BRANCH unsigned long oldbit; +#endif unsigned long temp; unsigned int * m = ((unsigned int *) addr) + (nr >> 5); +#ifndef BITOPS_NO_BRANCH __asm__ __volatile__( - "1: ldl_l %0,%1\n" + "1: ldl_l %0,%4\n" " and %0,%3,%2\n" " bne %2,2f\n" " xor %0,%3,%0\n" @@ -36,16 +43,57 @@ ".previous" :"=&r" (temp), "=m" (*m), "=&r" (oldbit) :"Ir" (1UL << (nr & 31)), "m" (*m)); +#else + __asm__ __volatile__( + "1: ldl_l %0,%3\n" + " bis %0,%2,%0\n" + " stl_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (*m) + :"Ir" (1UL << (nr & 31)), "m" (*m)); +#endif } +/* + * WARNING: non atomic version. + */ +extern __inline__ void __set_bit(unsigned long nr, volatile void * addr) +{ + unsigned int * m = ((unsigned int *) addr) + (nr >> 5); + /* + * Asm and C produces the same thing so let + * the compiler to do its good work. + */ +#if 0 + int tmp; + + __asm__ __volatile__( + "ldl %0,%3\n\t" + "bis %0,%2,%0\n\t" + "stl %0,%1" + : "=&r" (tmp), "=m" (*m) + : "Ir" (1UL << (nr & 31)), "m" (*m)); +#else + *m |= 1UL << (nr & 31); +#endif +} + +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() smp_mb() extern __inline__ void clear_bit(unsigned long nr, volatile void * addr) { +#ifndef BITOPS_NO_BRANCH unsigned long oldbit; +#endif unsigned long temp; unsigned int * m = ((unsigned int *) addr) + (nr >> 5); +#ifndef BITOPS_NO_BRANCH __asm__ __volatile__( - "1: ldl_l %0,%1\n" + "1: ldl_l %0,%4\n" " and %0,%3,%2\n" " beq %2,2f\n" " xor %0,%3,%0\n" @@ -57,6 +105,18 @@ ".previous" :"=&r" (temp), "=m" (*m), "=&r" (oldbit) :"Ir" (1UL << (nr & 31)), "m" (*m)); +#else + __asm__ __volatile__( + "1: ldl_l %0,%3\n" + " and %0,%2,%0\n" + " stl_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (*m) + :"Ir" (~(1UL << (nr & 31))), "m" (*m)); +#endif } extern __inline__ void change_bit(unsigned long nr, volatile void * addr) @@ -65,12 +125,12 @@ unsigned int * m = ((unsigned int *) addr) + (nr >> 5); __asm__ __volatile__( - "1: ldl_l %0,%1\n" + "1: ldl_l %0,%3\n" " xor %0,%2,%0\n" " stl_c %0,%1\n" - " beq %0,3f\n" + " beq %0,2f\n" ".subsection 2\n" - "3: br 1b\n" + "2: br 1b\n" ".previous" :"=&r" (temp), "=m" (*m) :"Ir" (1UL << (nr & 31)), "m" (*m)); @@ -84,18 +144,43 @@ unsigned int * m = ((unsigned int *) addr) + (nr >> 5); __asm__ __volatile__( - "1: ldl_l %0,%1\n" + "1: ldl_l %0,%4\n" " and %0,%3,%2\n" " bne %2,2f\n" " xor %0,%3,%0\n" " stl_c %0,%1\n" " beq %0,3f\n" +#ifdef CONFIG_SMP " mb\n" +#endif "2:\n" ".subsection 2\n" "3: br 1b\n" ".previous" :"=&r" (temp), "=m" (*m), "=&r" (oldbit) + :"Ir" (1UL << (nr & 31)), "m" (*m) : "memory"); + + return oldbit != 0; +} + +/* + * WARNING: non atomic version. + */ +extern __inline__ int __test_and_set_bit(unsigned long nr, + volatile void * addr) +{ + unsigned long oldbit; + unsigned long temp; + unsigned int * m = ((unsigned int *) addr) + (nr >> 5); + + __asm__ __volatile__( + " ldl %0,%4\n" + " and %0,%3,%2\n" + " bne %2,1f\n" + " xor %0,%3,%0\n" + " stl %0,%1\n" + "1:\n" + :"=&r" (temp), "=m" (*m), "=&r" (oldbit) :"Ir" (1UL << (nr & 31)), "m" (*m)); return oldbit != 0; @@ -109,18 +194,43 @@ unsigned int * m = ((unsigned int *) addr) + (nr >> 5); __asm__ __volatile__( - "1: ldl_l %0,%1\n" + "1: ldl_l %0,%4\n" " and %0,%3,%2\n" " beq %2,2f\n" " xor %0,%3,%0\n" " stl_c %0,%1\n" " beq %0,3f\n" +#ifdef CONFIG_SMP " mb\n" +#endif "2:\n" ".subsection 2\n" "3: br 1b\n" ".previous" :"=&r" (temp), "=m" (*m), "=&r" (oldbit) + :"Ir" (1UL << (nr & 31)), "m" (*m) : "memory"); + + return oldbit != 0; +} + +/* + * WARNING: non atomic version. + */ +extern __inline__ int __test_and_clear_bit(unsigned long nr, + volatile void * addr) +{ + unsigned long oldbit; + unsigned long temp; + unsigned int * m = ((unsigned int *) addr) + (nr >> 5); + + __asm__ __volatile__( + " ldl %0,%4\n" + " and %0,%3,%2\n" + " beq %2,1f\n" + " xor %0,%3,%0\n" + " stl %0,%1\n" + "1:\n" + :"=&r" (temp), "=m" (*m), "=&r" (oldbit) :"Ir" (1UL << (nr & 31)), "m" (*m)); return oldbit != 0; @@ -134,17 +244,19 @@ unsigned int * m = ((unsigned int *) addr) + (nr >> 5); __asm__ __volatile__( - "1: ldl_l %0,%1\n" + "1: ldl_l %0,%4\n" " and %0,%3,%2\n" " xor %0,%3,%0\n" " stl_c %0,%1\n" " beq %0,3f\n" +#ifdef CONFIG_SMP " mb\n" +#endif ".subsection 2\n" "3: br 1b\n" ".previous" :"=&r" (temp), "=m" (*m), "=&r" (oldbit) - :"Ir" (1UL << (nr & 31)), "m" (*m)); + :"Ir" (1UL << (nr & 31)), "m" (*m) : "memory"); return oldbit != 0; } @@ -279,16 +391,16 @@ #ifdef __KERNEL__ -#define ext2_set_bit test_and_set_bit -#define ext2_clear_bit test_and_clear_bit +#define ext2_set_bit __test_and_set_bit +#define ext2_clear_bit __test_and_clear_bit #define ext2_test_bit test_bit #define ext2_find_first_zero_bit find_first_zero_bit #define ext2_find_next_zero_bit find_next_zero_bit /* Bitmap functions for the minix filesystem. */ -#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr) -#define minix_set_bit(nr,addr) set_bit(nr,addr) -#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr) +#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr) +#define minix_set_bit(nr,addr) __set_bit(nr,addr) +#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr) #define minix_test_bit(nr,addr) test_bit(nr,addr) #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size) diff -urN 2.4.0-test9-pre5/include/asm-alpha/elf.h z/include/asm-alpha/elf.h --- 2.4.0-test9-pre5/include/asm-alpha/elf.h Thu Jul 20 21:31:11 2000 +++ z/include/asm-alpha/elf.h Fri Sep 22 17:47:44 2000 @@ -127,7 +127,7 @@ #ifdef __KERNEL__ #define SET_PERSONALITY(EX, IBCS2) \ - set_personality((EX).e_flags & EF_ALPHA_32BIT \ + set_personality(((EX).e_flags & EF_ALPHA_32BIT) \ ? PER_LINUX_32BIT : (IBCS2) ? PER_SVR4 : PER_LINUX) #endif diff -urN 2.4.0-test9-pre5/include/asm-alpha/semaphore-helper.h z/include/asm-alpha/semaphore-helper.h --- 2.4.0-test9-pre5/include/asm-alpha/semaphore-helper.h Sun Feb 27 06:19:44 2000 +++ z/include/asm-alpha/semaphore-helper.h Fri Sep 22 17:47:44 2000 @@ -37,7 +37,7 @@ ".subsection 2\n" "3: br 1b\n" ".previous" - : "=r"(ret), "=r"(tmp), "=m"(__atomic_fool_gcc(&sem->waking)) + : "=r"(ret), "=r"(tmp), "=m"(sem->waking.counter) : "0"(0)); return ret > 0; diff -urN 2.4.0-test9-pre5/include/asm-alpha/spinlock.h z/include/asm-alpha/spinlock.h --- 2.4.0-test9-pre5/include/asm-alpha/spinlock.h Tue Aug 15 22:44:25 2000 +++ z/include/asm-alpha/spinlock.h Fri Sep 22 17:47:44 2000 @@ -5,8 +5,8 @@ #include #include -#define DEBUG_SPINLOCK 1 -#define DEBUG_RWLOCK 1 +#define DEBUG_SPINLOCK 0 +#define DEBUG_RWLOCK 0 /* * Simple spin lock operations. There are two variants, one clears IRQ's @@ -38,9 +38,6 @@ #define spin_is_locked(x) ((x)->lock != 0) #define spin_unlock_wait(x) ({ do { barrier(); } while ((x)->lock); }) -typedef struct { unsigned long a[100]; } __dummy_lock_t; -#define __dummy_lock(lock) (*(__dummy_lock_t *)(lock)) - #if DEBUG_SPINLOCK extern void spin_unlock(spinlock_t * lock); extern void debug_spin_lock(spinlock_t * lock, const char *, int); @@ -83,8 +80,8 @@ " blbs %0,2b\n" " br 1b\n" ".previous" - : "=r" (tmp), "=m" (__dummy_lock(lock)) - : "m"(__dummy_lock(lock))); + : "=r" (tmp), "=m" (lock->lock) + : "m"(lock->lock) : "memory"); } #define spin_trylock(lock) (!test_and_set_bit(0,(lock))) @@ -119,9 +116,8 @@ " bne %1,6b\n" " br 1b\n" ".previous" - : "=m" (__dummy_lock(lock)), "=&r" (regx) - : "0" (__dummy_lock(lock)) - ); + : "=m" (*(volatile int *)lock), "=&r" (regx) + : "0" (*(volatile int *)lock) : "memory"); } static inline void read_lock(rwlock_t * lock) @@ -140,9 +136,8 @@ " blbs %1,6b\n" " br 1b\n" ".previous" - : "=m" (__dummy_lock(lock)), "=&r" (regx) - : "m" (__dummy_lock(lock)) - ); + : "=m" (*(volatile int *)lock), "=&r" (regx) + : "m" (*(volatile int *)lock) : "memory"); } #endif /* DEBUG_RWLOCK */ @@ -156,6 +151,7 @@ { long regx; __asm__ __volatile__( + " mb\n" "1: ldl_l %1,%0\n" " addl %1,2,%1\n" " stl_c %1,%0\n" @@ -163,8 +159,8 @@ ".subsection 2\n" "6: br 1b\n" ".previous" - : "=m" (__dummy_lock(lock)), "=&r" (regx) - : "m" (__dummy_lock(lock))); + : "=m" (*(volatile int *)lock), "=&r" (regx) + : "m" (*(volatile int *)lock) : "memory"); } #endif /* _ALPHA_SPINLOCK_H */ diff -urN 2.4.0-test9-pre5/include/asm-alpha/system.h z/include/asm-alpha/system.h --- 2.4.0-test9-pre5/include/asm-alpha/system.h Thu Aug 10 18:14:18 2000 +++ z/include/asm-alpha/system.h Fri Sep 22 17:47:44 2000 @@ -137,12 +137,19 @@ #define wmb() \ __asm__ __volatile__("wmb": : :"memory") +#ifdef __SMP__ +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#endif + #define set_mb(var, value) \ do { var = value; mb(); } while (0) -#define set_rmb(var, value) \ -do { var = value; rmb(); } while (0) - #define set_wmb(var, value) \ do { var = value; wmb(); } while (0) @@ -284,11 +291,11 @@ #define getipl() (rdps() & 7) #define setipl(ipl) ((void) swpipl(ipl)) -#define __cli() setipl(IPL_MAX) -#define __sti() setipl(IPL_MIN) +#define __cli() do { setipl(IPL_MAX); barrier(); } while(0) +#define __sti() do { barrier(); setipl(IPL_MIN); } while(0) #define __save_flags(flags) ((flags) = rdps()) -#define __save_and_cli(flags) ((flags) = swpipl(IPL_MAX)) -#define __restore_flags(flags) setipl(flags) +#define __save_and_cli(flags) do { (flags) = swpipl(IPL_MAX); barrier(); } while(0) +#define __restore_flags(flags) do { barrier(); setipl(flags); barrier(); } while(0) #define local_irq_save(flags) __save_and_cli(flags) #define local_irq_restore(flags) __restore_flags(flags) @@ -344,6 +351,8 @@ /* * Atomic exchange. + * Since it can be used to implement critical sections + * it must clobber "memory" (also for interrupts in UP). */ extern __inline__ unsigned long @@ -352,16 +361,18 @@ unsigned long dummy; __asm__ __volatile__( - "1: ldl_l %0,%2\n" + "1: ldl_l %0,%4\n" " bis $31,%3,%1\n" " stl_c %1,%2\n" " beq %1,2f\n" +#ifdef CONFIG_SMP " mb\n" +#endif ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (val), "=&r" (dummy), "=m" (*m) - : "rI" (val), "m" (*m)); + : "rI" (val), "m" (*m) : "memory"); return val; } @@ -372,16 +383,18 @@ unsigned long dummy; __asm__ __volatile__( - "1: ldq_l %0,%2\n" + "1: ldq_l %0,%4\n" " bis $31,%3,%1\n" " stq_c %1,%2\n" " beq %1,2f\n" +#ifdef CONFIG_SMP " mb\n" +#endif ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (val), "=&r" (dummy), "=m" (*m) - : "rI" (val), "m" (*m)); + : "rI" (val), "m" (*m) : "memory"); return val; } @@ -416,6 +429,11 @@ * Atomic compare and exchange. Compare OLD with MEM, if identical, * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. + * + * The memory barrier should be placed in SMP only when we actually + * make the change. If we don't change anything (so if the returned + * prev is equal to old) then we aren't acquiring anything new and + * we don't need any memory barrier as far I can tell. */ #define __HAVE_ARCH_CMPXCHG 1 @@ -426,18 +444,21 @@ unsigned long prev, cmp; __asm__ __volatile__( - "1: ldl_l %0,%2\n" + "1: ldl_l %0,%5\n" " cmpeq %0,%3,%1\n" " beq %1,2f\n" " mov %4,%1\n" " stl_c %1,%2\n" " beq %1,3f\n" - "2: mb\n" +#ifdef CONFIG_SMP + " mb\n" +#endif + "2:\n" ".subsection 2\n" "3: br 1b\n" ".previous" : "=&r"(prev), "=&r"(cmp), "=m"(*m) - : "r"((long) old), "r"(new), "m"(*m)); + : "r"((long) old), "r"(new), "m"(*m) : "memory"); return prev; } @@ -448,18 +469,21 @@ unsigned long prev, cmp; __asm__ __volatile__( - "1: ldq_l %0,%2\n" + "1: ldq_l %0,%5\n" " cmpeq %0,%3,%1\n" " beq %1,2f\n" " mov %4,%1\n" " stq_c %1,%2\n" " beq %1,3f\n" - "2: mb\n" +#ifdef CONFIG_SMP + " mb\n" +#endif + "2:\n" ".subsection 2\n" "3: br 1b\n" ".previous" : "=&r"(prev), "=&r"(cmp), "=m"(*m) - : "r"((long) old), "r"(new), "m"(*m)); + : "r"((long) old), "r"(new), "m"(*m) : "memory"); return prev; } diff -urN 2.4.0-test9-pre5/include/asm-i386/atomic.h z/include/asm-i386/atomic.h --- 2.4.0-test9-pre5/include/asm-i386/atomic.h Mon Sep 4 14:35:13 2000 +++ z/include/asm-i386/atomic.h Fri Sep 22 17:47:44 2000 @@ -19,102 +19,96 @@ * on us. We need to use _exactly_ the address the user gave us, * not some alias that contains the same information. */ -#define __atomic_fool_gcc(x) (*(volatile struct { int a[100]; } *)x) - -#ifdef CONFIG_SMP typedef struct { volatile int counter; } atomic_t; -#else -typedef struct { int counter; } atomic_t; -#endif #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) #define atomic_set(v,i) (((v)->counter) = (i)) -static __inline__ void atomic_add(int i, volatile atomic_t *v) +static __inline__ void atomic_add(int i, atomic_t *v) { __asm__ __volatile__( LOCK "addl %1,%0" - :"=m" (__atomic_fool_gcc(v)) - :"ir" (i), "m" (__atomic_fool_gcc(v))); + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); } -static __inline__ void atomic_sub(int i, volatile atomic_t *v) +static __inline__ void atomic_sub(int i, atomic_t *v) { __asm__ __volatile__( LOCK "subl %1,%0" - :"=m" (__atomic_fool_gcc(v)) - :"ir" (i), "m" (__atomic_fool_gcc(v))); + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); } -static __inline__ int atomic_sub_and_test(int i, volatile atomic_t *v) +static __inline__ int atomic_sub_and_test(int i, atomic_t *v) { unsigned char c; __asm__ __volatile__( LOCK "subl %2,%0; sete %1" - :"=m" (__atomic_fool_gcc(v)), "=qm" (c) - :"ir" (i), "m" (__atomic_fool_gcc(v))); + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); return c; } -static __inline__ void atomic_inc(volatile atomic_t *v) +static __inline__ void atomic_inc(atomic_t *v) { __asm__ __volatile__( LOCK "incl %0" - :"=m" (__atomic_fool_gcc(v)) - :"m" (__atomic_fool_gcc(v))); + :"=m" (v->counter) + :"m" (v->counter)); } -static __inline__ void atomic_dec(volatile atomic_t *v) +static __inline__ void atomic_dec(atomic_t *v) { __asm__ __volatile__( LOCK "decl %0" - :"=m" (__atomic_fool_gcc(v)) - :"m" (__atomic_fool_gcc(v))); + :"=m" (v->counter) + :"m" (v->counter)); } -static __inline__ int atomic_dec_and_test(volatile atomic_t *v) +static __inline__ int atomic_dec_and_test(atomic_t *v) { unsigned char c; __asm__ __volatile__( LOCK "decl %0; sete %1" - :"=m" (__atomic_fool_gcc(v)), "=qm" (c) - :"m" (__atomic_fool_gcc(v))); + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); return c != 0; } -static __inline__ int atomic_inc_and_test(volatile atomic_t *v) +static __inline__ int atomic_inc_and_test(atomic_t *v) { unsigned char c; __asm__ __volatile__( LOCK "incl %0; sete %1" - :"=m" (__atomic_fool_gcc(v)), "=qm" (c) - :"m" (__atomic_fool_gcc(v))); + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); return c != 0; } -extern __inline__ int atomic_add_negative(int i, volatile atomic_t *v) +static __inline__ int atomic_add_negative(int i, atomic_t *v) { unsigned char c; __asm__ __volatile__( LOCK "addl %2,%0; sets %1" - :"=m" (__atomic_fool_gcc(v)), "=qm" (c) - :"ir" (i), "m" (__atomic_fool_gcc(v))); + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); return c; } /* These are x86-specific, used by some header files */ #define atomic_clear_mask(mask, addr) \ __asm__ __volatile__(LOCK "andl %0,%1" \ -: : "r" (~(mask)),"m" (__atomic_fool_gcc(addr)) : "memory") +: : "r" (~(mask)),"m" (*addr) : "memory") #define atomic_set_mask(mask, addr) \ __asm__ __volatile__(LOCK "orl %0,%1" \ -: : "r" (mask),"m" (__atomic_fool_gcc(addr)) : "memory") +: : "r" (mask),"m" (*addr) : "memory") #endif diff -urN 2.4.0-test9-pre5/include/asm-i386/bitops.h z/include/asm-i386/bitops.h --- 2.4.0-test9-pre5/include/asm-i386/bitops.h Sat Aug 26 18:15:40 2000 +++ z/include/asm-i386/bitops.h Fri Sep 22 17:47:44 2000 @@ -21,29 +21,9 @@ #define LOCK_PREFIX "" #endif -/* - * Function prototypes to keep gcc -Wall happy - */ -extern void set_bit(int nr, volatile void * addr); -extern void clear_bit(int nr, volatile void * addr); -extern void change_bit(int nr, volatile void * addr); -extern int test_and_set_bit(int nr, volatile void * addr); -extern int test_and_clear_bit(int nr, volatile void * addr); -extern int test_and_change_bit(int nr, volatile void * addr); -extern int __constant_test_bit(int nr, const volatile void * addr); -extern int __test_bit(int nr, volatile void * addr); -extern int find_first_zero_bit(void * addr, unsigned size); -extern int find_next_zero_bit (void * addr, int size, int offset); -extern unsigned long ffz(unsigned long word); - -/* - * Some hacks to defeat gcc over-optimizations.. - */ -struct __dummy { unsigned long a[100]; }; -#define ADDR (*(volatile struct __dummy *) addr) -#define CONST_ADDR (*(volatile const struct __dummy *) addr) +#define ADDR (*(volatile long *) addr) -extern __inline__ void set_bit(int nr, volatile void * addr) +static __inline__ void set_bit(int nr, volatile void * addr) { __asm__ __volatile__( LOCK_PREFIX "btsl %1,%0" @@ -51,7 +31,21 @@ :"Ir" (nr)); } -extern __inline__ void clear_bit(int nr, volatile void * addr) +/* WARNING: non atomic and it can be reordered! */ +static __inline__ void __set_bit(int nr, volatile void * addr) +{ + __asm__( + "btsl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/* + * clear_bit() doesn't provide any barrier for the compiler. + */ +#define smp_mb__before_clear_bit() barrier() +#define smp_mb__after_clear_bit() barrier() +static __inline__ void clear_bit(int nr, volatile void * addr) { __asm__ __volatile__( LOCK_PREFIX "btrl %1,%0" @@ -59,7 +53,7 @@ :"Ir" (nr)); } -extern __inline__ void change_bit(int nr, volatile void * addr) +static __inline__ void change_bit(int nr, volatile void * addr) { __asm__ __volatile__( LOCK_PREFIX "btcl %1,%0" @@ -67,48 +61,77 @@ :"Ir" (nr)); } -extern __inline__ int test_and_set_bit(int nr, volatile void * addr) +/* + * It will also imply a memory barrier, thus it must clobber memory + * to make sure to reload anything that was cached into registers + * outside _this_ critical section. + */ +static __inline__ int test_and_set_bit(int nr, volatile void * addr) { int oldbit; __asm__ __volatile__( LOCK_PREFIX "btsl %2,%1\n\tsbbl %0,%0" :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +/* WARNING: non atomic and it can be reordered! */ +static __inline__ int __test_and_set_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__( + "btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) :"Ir" (nr)); return oldbit; } -extern __inline__ int test_and_clear_bit(int nr, volatile void * addr) +static __inline__ int test_and_clear_bit(int nr, volatile void * addr) { int oldbit; __asm__ __volatile__( LOCK_PREFIX "btrl %2,%1\n\tsbbl %0,%0" :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +/* WARNING: non atomic and it can be reordered! */ +static __inline__ int __test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__( + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) :"Ir" (nr)); return oldbit; } -extern __inline__ int test_and_change_bit(int nr, volatile void * addr) +static __inline__ int test_and_change_bit(int nr, volatile void * addr) { int oldbit; __asm__ __volatile__( LOCK_PREFIX "btcl %2,%1\n\tsbbl %0,%0" :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr)); + :"Ir" (nr) : "memory"); return oldbit; } /* * This routine doesn't need to be atomic. */ -extern __inline__ int __constant_test_bit(int nr, const volatile void * addr) +static __inline__ int constant_test_bit(int nr, const volatile void * addr) { return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; } -extern __inline__ int __test_bit(int nr, volatile void * addr) +static __inline__ int variable_test_bit(int nr, volatile void * addr) { int oldbit; @@ -121,13 +144,13 @@ #define test_bit(nr,addr) \ (__builtin_constant_p(nr) ? \ - __constant_test_bit((nr),(addr)) : \ - __test_bit((nr),(addr))) + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) /* * Find-bit routines.. */ -extern __inline__ int find_first_zero_bit(void * addr, unsigned size) +static __inline__ int find_first_zero_bit(void * addr, unsigned size) { int d0, d1, d2; int res; @@ -151,7 +174,7 @@ return res; } -extern __inline__ int find_next_zero_bit (void * addr, int size, int offset) +static __inline__ int find_next_zero_bit (void * addr, int size, int offset) { unsigned long * p = ((unsigned long *) addr) + (offset >> 5); int set = 0, bit = offset & 31, res; @@ -182,7 +205,7 @@ * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. */ -extern __inline__ unsigned long ffz(unsigned long word) +static __inline__ unsigned long ffz(unsigned long word) { __asm__("bsfl %1,%0" :"=r" (word) @@ -198,7 +221,7 @@ * differs in spirit from the above ffz (man ffs). */ -extern __inline__ int ffs(int x) +static __inline__ int ffs(int x) { int r; @@ -222,16 +245,16 @@ #ifdef __KERNEL__ -#define ext2_set_bit test_and_set_bit -#define ext2_clear_bit test_and_clear_bit +#define ext2_set_bit __test_and_set_bit +#define ext2_clear_bit __test_and_clear_bit #define ext2_test_bit test_bit #define ext2_find_first_zero_bit find_first_zero_bit #define ext2_find_next_zero_bit find_next_zero_bit /* Bitmap functions for the minix filesystem. */ -#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr) -#define minix_set_bit(nr,addr) set_bit(nr,addr) -#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr) +#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr) +#define minix_set_bit(nr,addr) __set_bit(nr,addr) +#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr) #define minix_test_bit(nr,addr) test_bit(nr,addr) #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size) diff -urN 2.4.0-test9-pre5/include/asm-i386/rwlock.h z/include/asm-i386/rwlock.h --- 2.4.0-test9-pre5/include/asm-i386/rwlock.h Wed Dec 8 00:05:27 1999 +++ z/include/asm-i386/rwlock.h Fri Sep 22 17:47:44 2000 @@ -17,9 +17,6 @@ #ifndef _ASM_I386_RWLOCK_H #define _ASM_I386_RWLOCK_H -typedef struct { unsigned long a[100]; } __dummy_lock_t; -#define __dummy_lock(lock) (*(__dummy_lock_t *)(lock)) - #define RW_LOCK_BIAS 0x01000000 #define RW_LOCK_BIAS_STR "0x01000000" @@ -44,7 +41,7 @@ "popl %%eax\n\t" \ "jmp 1b\n" \ ".previous" \ - :"=m" (__dummy_lock(rw))) + :"=m" (*(volatile int *)rw) : : "memory") #define __build_read_lock(rw, helper) do { \ if (__builtin_constant_p(rw)) \ @@ -74,7 +71,7 @@ "popl %%eax\n\t" \ "jmp 1b\n" \ ".previous" \ - :"=m" (__dummy_lock(rw))) + :"=m" (*(volatile int *)rw) : : "memory") #define __build_write_lock(rw, helper) do { \ if (__builtin_constant_p(rw)) \ diff -urN 2.4.0-test9-pre5/include/asm-i386/spinlock.h z/include/asm-i386/spinlock.h --- 2.4.0-test9-pre5/include/asm-i386/spinlock.h Tue Sep 12 02:32:47 2000 +++ z/include/asm-i386/spinlock.h Fri Sep 22 17:50:09 2000 @@ -70,13 +70,12 @@ char oldval; __asm__ __volatile__( "xchgb %b0,%1" - :"=q" (oldval), "=m" (__dummy_lock(lock)) - :"0" (0) - :"memory"); + :"=q" (oldval), "=m" (lock->lock) + :"0" (0) : "memory"); return oldval > 0; } -extern inline void spin_lock(spinlock_t *lock) +static inline void spin_lock(spinlock_t *lock) { #if SPINLOCK_DEBUG __label__ here; @@ -88,11 +87,10 @@ #endif __asm__ __volatile__( spin_lock_string - :"=m" (__dummy_lock(lock)) - : :"memory"); + :"=m" (lock->lock) : : "memory"); } -extern inline void spin_unlock(spinlock_t *lock) +static inline void spin_unlock(spinlock_t *lock) { #if SPINLOCK_DEBUG if (lock->magic != SPINLOCK_MAGIC) @@ -102,8 +100,7 @@ #endif __asm__ __volatile__( spin_unlock_string - :"=m" (__dummy_lock(lock)) - : :"memory"); + :"=m" (lock->lock) : : "memory"); } /* @@ -146,7 +143,7 @@ */ /* the spinlock helpers are in arch/i386/kernel/semaphore.S */ -extern inline void read_lock(rwlock_t *rw) +static inline void read_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -155,7 +152,7 @@ __build_read_lock(rw, "__read_lock_failed"); } -extern inline void write_lock(rwlock_t *rw) +static inline void write_lock(rwlock_t *rw) { #if SPINLOCK_DEBUG if (rw->magic != RWLOCK_MAGIC) @@ -164,10 +161,10 @@ __build_write_lock(rw, "__write_lock_failed"); } -#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" (__dummy_lock(&(rw)->lock))) -#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" (__dummy_lock(&(rw)->lock))) +#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") +#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") -extern inline int write_trylock(rwlock_t *lock) +static inline int write_trylock(rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; if (atomic_sub_and_test(RW_LOCK_BIAS, count)) diff -urN 2.4.0-test9-pre5/include/asm-i386/system.h z/include/asm-i386/system.h --- 2.4.0-test9-pre5/include/asm-i386/system.h Tue Sep 12 02:32:47 2000 +++ z/include/asm-i386/system.h Fri Sep 22 17:47:44 2000 @@ -278,11 +278,22 @@ #endif #define rmb() mb() #define wmb() __asm__ __volatile__ ("": : :"memory") + +#ifdef __SMP__ +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#endif + #define set_mb(var, value) do { xchg(&var, value); } while (0) #define set_wmb(var, value) do { var = value; wmb(); } while (0) /* interrupt control.. */ -#define __save_flags(x) __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */ :"memory") +#define __save_flags(x) __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */) #define __restore_flags(x) __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory") #define __cli() __asm__ __volatile__("cli": : :"memory") #define __sti() __asm__ __volatile__("sti": : :"memory") @@ -291,9 +302,9 @@ /* For spinlocks etc */ #define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory") -#define local_irq_restore(x) __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory") -#define local_irq_disable() __asm__ __volatile__("cli": : :"memory") -#define local_irq_enable() __asm__ __volatile__("sti": : :"memory") +#define local_irq_restore(x) __restore_flags(x) +#define local_irq_disable() __cli() +#define local_irq_enable() __sti() #ifdef CONFIG_SMP diff -urN 2.4.0-test9-pre5/include/asm-sparc64/system.h z/include/asm-sparc64/system.h --- 2.4.0-test9-pre5/include/asm-sparc64/system.h Thu Aug 17 19:57:41 2000 +++ z/include/asm-sparc64/system.h Fri Sep 22 17:47:44 2000 @@ -100,8 +100,8 @@ #define nop() __asm__ __volatile__ ("nop") #define membar(type) __asm__ __volatile__ ("membar " type : : : "memory"); -#define rmb() membar("#LoadLoad | #LoadStore") -#define wmb() membar("#StoreLoad | #StoreStore") +#define rmb() membar("#LoadLoad") +#define wmb() membar("#StoreStore") #define set_mb(__var, __value) \ do { __var = __value; membar("#StoreLoad | #StoreStore"); } while(0) #define set_wmb(__var, __value) \ diff -urN 2.4.0-test9-pre5/include/linux/brlock.h z/include/linux/brlock.h --- 2.4.0-test9-pre5/include/linux/brlock.h Sat Aug 26 18:15:47 2000 +++ z/include/linux/brlock.h Fri Sep 22 17:47:44 2000 @@ -114,10 +114,23 @@ lock = &__br_write_locks[idx].lock; again: (*ctr)++; - rmb(); + mb(); if (spin_is_locked(lock)) { (*ctr)--; - rmb(); + wmb(); /* + * The release of the ctr must become visible + * to the other cpus eventually thus wmb(), + * we don't care if spin_is_locked is reordered + * before the releasing of the ctr. + * However IMHO this wmb() is superflous even in theory. + * It would not be superflous only if on the + * other CPUs doing a ldl_l instead of an ldl + * would make a difference and I don't think this is + * the case. + * I'd like to clarify this issue further + * but for now this is a slow path so adding the + * wmb() will keep us on the safe side. + */ while (spin_is_locked(lock)) barrier(); goto again; diff -urN 2.4.0-test9-pre5/include/linux/locks.h z/include/linux/locks.h --- 2.4.0-test9-pre5/include/linux/locks.h Sun Aug 27 16:21:04 2000 +++ z/include/linux/locks.h Fri Sep 22 17:47:44 2000 @@ -29,7 +29,9 @@ extern inline void unlock_buffer(struct buffer_head *bh) { clear_bit(BH_Lock, &bh->b_state); - wake_up(&bh->b_wait); + smp_mb__after_clear_bit(); + if (waitqueue_active(&bh->b_wait)) + wake_up(&bh->b_wait); } /* @@ -55,7 +57,12 @@ extern inline void unlock_super(struct super_block * sb) { sb->s_lock = 0; - wake_up(&sb->s_wait); + /* + * No need of any barrier, we're protected by + * the big kernel lock here... unfortunately :) + */ + if (waitqueue_active(&sb->s_wait)) + wake_up(&sb->s_wait); } #endif /* _LINUX_LOCKS_H */ diff -urN 2.4.0-test9-pre5/include/linux/mm.h z/include/linux/mm.h --- 2.4.0-test9-pre5/include/linux/mm.h Thu Sep 21 17:44:41 2000 +++ z/include/linux/mm.h Fri Sep 22 17:47:44 2000 @@ -193,9 +193,18 @@ #define PageLocked(page) test_bit(PG_locked, &(page)->flags) #define LockPage(page) set_bit(PG_locked, &(page)->flags) #define TryLockPage(page) test_and_set_bit(PG_locked, &(page)->flags) +/* + * The first mb is necessary to safely close the critical section opened by the + * TryLockPage(), the second mb is necessary to enforce ordering between + * the clear_bit and the read of the waitqueue (to avoid SMP races with a + * parallel wait_on_page). + */ #define UnlockPage(page) do { \ + smp_mb__before_clear_bit(); \ clear_bit(PG_locked, &(page)->flags); \ - wake_up(&page->wait); \ + smp_mb__after_clear_bit(); \ + if (waitqueue_active(&page->wait)) \ + wake_up(&page->wait); \ } while (0) #define PageError(page) test_bit(PG_error, &(page)->flags) #define SetPageError(page) set_bit(PG_error, &(page)->flags) diff -urN 2.4.0-test9-pre5/include/linux/tqueue.h z/include/linux/tqueue.h --- 2.4.0-test9-pre5/include/linux/tqueue.h Sat Aug 26 18:15:42 2000 +++ z/include/linux/tqueue.h Fri Sep 22 17:47:44 2000 @@ -114,7 +114,7 @@ f = p -> routine; save_p = p; p = p -> next; - mb(); + smp_mb(); save_p -> sync = 0; if (f) (*f)(arg); diff -urN 2.4.0-test9-pre5/kernel/softirq.c z/kernel/softirq.c --- 2.4.0-test9-pre5/kernel/softirq.c Thu Aug 17 19:57:44 2000 +++ z/kernel/softirq.c Fri Sep 22 17:47:44 2000 @@ -44,7 +44,7 @@ irq_cpustat_t irq_stat[NR_CPUS]; #endif /* CONFIG_ARCH_S390 */ -static struct softirq_action softirq_vec[32]; +static struct softirq_action softirq_vec[32] __cacheline_aligned; asmlinkage void do_softirq() { @@ -140,6 +140,14 @@ clear_bit(TASKLET_STATE_SCHED, &t->state); t->func(t->data); + /* + * talklet_trylock() uses test_and_set_bit that imply + * an mb when it returns zero, thus we need the explicit + * mb only here: while closing the critical section. + */ +#ifdef CONFIG_SMP + smp_mb__before_clear_bit(); +#endif tasklet_unlock(t); continue; } diff -urN 2.4.0-test9-pre5/net/core/dev.c z/net/core/dev.c --- 2.4.0-test9-pre5/net/core/dev.c Thu Sep 21 17:44:41 2000 +++ z/net/core/dev.c Fri Sep 22 17:47:44 2000 @@ -1141,6 +1141,7 @@ struct net_device *dev = head; head = head->next_sched; + smp_mb__before_clear_bit(); clear_bit(__LINK_STATE_SCHED, &dev->state); if (spin_trylock(&dev->queue_lock)) {