Several places in ext2 and ext3 are using filesystem-wide counters which use global locking. Mainly for the orlov allocator's heuristics. To solve the contention which this causes we can trade off accuracy against speed. This patch introduces a "percpu_counter" library type in which the counts are per-cpu and are periodically spilled into a global counter. Readers only read the global counter. These objects are *large*. On a 32 CPU P4, they are 4 kbytes. On a 4 way p3, 128 bytes. 25-akpm/include/linux/percpu_counter.h | 100 +++++++++++++++++++++++++++++++++ 25-akpm/kernel/ksyms.c | 2 25-akpm/lib/Makefile | 1 25-akpm/lib/percpu_counter.c | 18 +++++ 4 files changed, 121 insertions(+) diff -puN /dev/null include/linux/percpu_counter.h --- /dev/null Thu Apr 11 07:25:15 2002 +++ 25-akpm/include/linux/percpu_counter.h Tue Apr 1 16:52:19 2003 @@ -0,0 +1,100 @@ +/* + * A simple "approximate counter" for use in ext2 and ext3 superblocks. + * + * WARNING: these things are HUGE. 4 kbytes per counter on 32-way P4. + */ + +#include +#include +#include +#include + +#ifdef CONFIG_SMP + +struct __percpu_counter { + long count; +} ____cacheline_aligned; + +struct percpu_counter { + spinlock_t lock; + long count; + struct __percpu_counter counters[NR_CPUS]; +}; + +#if NR_CPUS >= 16 +#define FBC_BATCH (NR_CPUS*2) +#else +#define FBC_BATCH (NR_CPUS*4) +#endif + +static inline void percpu_counter_init(struct percpu_counter *fbc) +{ + int i; + + spin_lock_init(&fbc->lock); + fbc->count = 0; + for (i = 0; i < NR_CPUS; i++) + fbc->counters[i].count = 0; +} + +void percpu_counter_mod(struct percpu_counter *fbc, long amount); + +static inline long percpu_counter_read(struct percpu_counter *fbc) +{ + return fbc->count; +} + +/* + * It is possible for the percpu_counter_read() to return a small negative + * number for some counter which should never be negative. + */ +static inline long percpu_counter_read_positive(struct percpu_counter *fbc) +{ + long ret = fbc->count; + + barrier(); /* Prevent reloads of fbc->count */ + if (ret > 0) + return ret; + return 1; +} + +#else + +struct percpu_counter { + long count; +}; + +static inline void percpu_counter_init(struct percpu_counter *fbc) +{ + fbc->count = 0; +} + +static inline void +percpu_counter_mod(struct percpu_counter *fbc, long amount) +{ + preempt_disable(); + fbc->count += amount; + preempt_enable(); +} + +static inline long percpu_counter_read(struct percpu_counter *fbc) +{ + return fbc->count; +} + +static inline long percpu_counter_read_positive(struct percpu_counter *fbc) +{ + return fbc->count; +} + +#endif /* CONFIG_SMP */ + +static inline void percpu_counter_inc(struct percpu_counter *fbc) +{ + percpu_counter_mod(fbc, 1); +} + +static inline void percpu_counter_dec(struct percpu_counter *fbc) +{ + percpu_counter_mod(fbc, -1); +} diff -puN kernel/ksyms.c~percpu_counter kernel/ksyms.c --- 25/kernel/ksyms.c~percpu_counter Tue Apr 1 16:52:19 2003 +++ 25-akpm/kernel/ksyms.c Tue Apr 1 16:52:19 2003 @@ -58,6 +58,7 @@ #include #include #include +#include #include #if defined(CONFIG_PROC_FS) @@ -100,6 +101,7 @@ EXPORT_SYMBOL(kfree); #ifdef CONFIG_SMP EXPORT_SYMBOL(kmalloc_percpu); EXPORT_SYMBOL(kfree_percpu); +EXPORT_SYMBOL(percpu_counter_mod); #endif EXPORT_SYMBOL(vfree); EXPORT_SYMBOL(__vmalloc); diff -puN lib/Makefile~percpu_counter lib/Makefile --- 25/lib/Makefile~percpu_counter Tue Apr 1 16:52:19 2003 +++ 25-akpm/lib/Makefile Tue Apr 1 16:52:19 2003 @@ -14,6 +14,7 @@ obj-y := errno.o ctype.o string.o vsprin obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o +obj-$(CONFIG_SMP) += percpu_counter.o ifneq ($(CONFIG_HAVE_DEC_LOCK),y) obj-y += dec_and_lock.o diff -puN /dev/null lib/percpu_counter.c --- /dev/null Thu Apr 11 07:25:15 2002 +++ 25-akpm/lib/percpu_counter.c Tue Apr 1 16:52:19 2003 @@ -0,0 +1,18 @@ + +#include + +void percpu_counter_mod(struct percpu_counter *fbc, long amount) +{ + int cpu = get_cpu(); + long count = fbc->counters[cpu].count; + + count += amount; + if (count >= FBC_BATCH || count <= -FBC_BATCH) { + spin_lock(&fbc->lock); + fbc->count += count; + spin_unlock(&fbc->lock); + count = 0; + } + fbc->counters[cpu].count = count; + put_cpu(); +} _