From: Manfred Spraul I found a new idea for optimizing the loop in kmalloc - now the cleanup both reduces the size of the .text segment in mm/slab.o and speeds up largs kmallocs a bit. I still loose 2 cycles for kmalloc(32,GFP_KERNEL) compared to the current code, but I haven't figured out how I could optimize further. - inline kmem_find_general_cachep() - optimize kmem_find_general_cachep() and document the optimization. - remove duplicate code from __kmalloc(): call kmem_find_general_cachep() directly. Based on a patch from Renaud Lienhart Signed-of-by: Manfred Spraul Signed-off-by: Andrew Morton --- 25-akpm/mm/slab.c | 52 +++++++++++++++++++++++++--------------------------- 1 files changed, 25 insertions(+), 27 deletions(-) diff -puN mm/slab.c~slab-kmalloc-cleanups mm/slab.c --- 25/mm/slab.c~slab-kmalloc-cleanups Fri Mar 11 16:03:16 2005 +++ 25-akpm/mm/slab.c Fri Mar 11 16:03:16 2005 @@ -509,7 +509,7 @@ static int slab_break_gfp_order = BREAK_ struct cache_sizes malloc_sizes[] = { #define CACHE(x) { .cs_size = (x) }, #include - { 0, } + CACHE(ULONG_MAX) #undef CACHE }; @@ -586,20 +586,28 @@ static inline struct array_cache *ac_dat return cachep->array[smp_processor_id()]; } -static kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags) +static inline kmem_cache_t *kmem_find_general_cachep (size_t size, int gfpflags) { struct cache_sizes *csizep = malloc_sizes; - /* This function could be moved to the header file, and - * made inline so consumers can quickly determine what - * cache pointer they require. +#if DEBUG + /* This happens if someone tries to call + * kmem_cache_create(), or __kmalloc(), before + * the generic caches are initialized. + */ + BUG_ON(csizep->cs_cachep == NULL); +#endif + while (size > csizep->cs_size) + csizep++; + + /* + * Really subtile: The last entry with cs->cs_size==ULONG_MAX + * has cs_{dma,}cachep==NULL. Thus no special case + * for large kmalloc calls required. */ - for ( ; csizep->cs_size; csizep++) { - if (size > csizep->cs_size) - continue; - break; - } - return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep; + if (unlikely(gfpflags & GFP_DMA)) + return csizep->cs_dmacachep; + return csizep->cs_cachep; } /* Cal the num objs, wastage, and bytes left over for a given slab size. */ @@ -798,7 +806,7 @@ void __init kmem_cache_init(void) sizes = malloc_sizes; names = cache_names; - while (sizes->cs_size) { + while (sizes->cs_size != ULONG_MAX) { /* For performance, all the general caches are L1 aligned. * This should be particularly beneficial on SMP boxes, as it * eliminates "false sharing". @@ -2461,22 +2469,12 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); */ void * __kmalloc (size_t size, int flags) { - struct cache_sizes *csizep = malloc_sizes; + kmem_cache_t *cachep; - for (; csizep->cs_size; csizep++) { - if (size > csizep->cs_size) - continue; -#if DEBUG - /* This happens if someone tries to call - * kmem_cache_create(), or kmalloc(), before - * the generic caches are initialized. - */ - BUG_ON(csizep->cs_cachep == NULL); -#endif - return __cache_alloc(flags & GFP_DMA ? - csizep->cs_dmacachep : csizep->cs_cachep, flags); - } - return NULL; + cachep = kmem_find_general_cachep(size, flags); + if (unlikely(cachep == NULL)) + return NULL; + return __cache_alloc(cachep, flags); } EXPORT_SYMBOL(__kmalloc); _