From: Manfred Spraul and Brian Gerst The patch performs the kmalloc cache lookup for constant kmalloc calls at compile time. The idea is that the loop in kmalloc takes a significant amount of time, and for kmalloc(4096,GFP_KERNEL), that lookup can happen entirely at compile time. A problem has been seen with gcc-3.2.2-5 from RedHat. This code: if(__builtin_constant_t(size)) { if(size < 32) return kmem_cache_alloc(...); if(size < 64) return kmem_cache_alloc(...); if(size < 96) return kmem_cache_alloc(...); if(size < 128) return kmem_cache_alloc(...); ... } doesn't work, because gcc only optimizes the first two or three comparisons, and then suddenly generates code. But we did it that way anyway. Apparently it's fixed in later compilers. include/linux/slab.h | 33 ++++++++++++++++++++++++++++++++- kernel/ksyms.c | 3 ++- mm/slab.c | 8 ++------ 3 files changed, 36 insertions(+), 8 deletions(-) diff -puN include/linux/slab.h~fixed-size-kmalloc-speedup include/linux/slab.h --- 25/include/linux/slab.h~fixed-size-kmalloc-speedup 2003-06-10 23:33:20.000000000 -0700 +++ 25-akpm/include/linux/slab.h 2003-06-10 23:33:20.000000000 -0700 @@ -62,7 +62,38 @@ extern void *kmem_cache_alloc(kmem_cache extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); -extern void *kmalloc(size_t, int); +/* Size description struct for general caches. */ +struct cache_sizes { + size_t cs_size; + kmem_cache_t *cs_cachep; + kmem_cache_t *cs_dmacachep; +}; +extern struct cache_sizes malloc_sizes[]; +extern void *__kmalloc(size_t, int); + +static inline void *kmalloc(size_t size, int flags) +{ + if (__builtin_constant_p(size)) { + int i = 0; +#define CACHE(x) \ + if (size <= x) \ + goto found; \ + else \ + i++; +#include "kmalloc_sizes.h" +#undef CACHE + { + extern void __you_cannot_kmalloc_that_much(void); + __you_cannot_kmalloc_that_much(); + } +found: + return kmem_cache_alloc((flags & GFP_DMA) ? + malloc_sizes[i].cs_dmacachep : + malloc_sizes[i].cs_cachep, flags); + } + return __kmalloc(size, flags); +} + extern void kfree(const void *); extern unsigned int ksize(const void *); diff -puN kernel/ksyms.c~fixed-size-kmalloc-speedup kernel/ksyms.c --- 25/kernel/ksyms.c~fixed-size-kmalloc-speedup 2003-06-10 23:33:20.000000000 -0700 +++ 25-akpm/kernel/ksyms.c 2003-06-10 23:33:20.000000000 -0700 @@ -95,7 +95,8 @@ EXPORT_SYMBOL(kmem_cache_free); EXPORT_SYMBOL(kmem_cache_size); EXPORT_SYMBOL(set_shrinker); EXPORT_SYMBOL(remove_shrinker); -EXPORT_SYMBOL(kmalloc); +EXPORT_SYMBOL(malloc_sizes); +EXPORT_SYMBOL(__kmalloc); EXPORT_SYMBOL(kfree); #ifdef CONFIG_SMP EXPORT_SYMBOL(__alloc_percpu); diff -puN mm/slab.c~fixed-size-kmalloc-speedup mm/slab.c --- 25/mm/slab.c~fixed-size-kmalloc-speedup 2003-06-10 23:33:20.000000000 -0700 +++ 25-akpm/mm/slab.c 2003-06-10 23:33:20.000000000 -0700 @@ -385,11 +385,7 @@ static int slab_break_gfp_order = BREAK_ #define GET_PAGE_SLAB(pg) ((struct slab *)(pg)->list.prev) /* These are the default caches for kmalloc. Custom caches can have other sizes. */ -static struct cache_sizes { - size_t cs_size; - kmem_cache_t *cs_cachep; - kmem_cache_t *cs_dmacachep; -} malloc_sizes[] = { +struct cache_sizes malloc_sizes[] = { #define CACHE(x) { .cs_size = (x) }, #include { 0, } @@ -1967,7 +1963,7 @@ void * kmem_cache_alloc (kmem_cache_t *c * platforms. For example, on i386, it means that the memory must come * from the first 16MB. */ -void * kmalloc (size_t size, int flags) +void * __kmalloc (size_t size, int flags) { struct cache_sizes *csizep = malloc_sizes; _