From: Brent Casavant The following patch modifies the dentry cache and inode cache to enable the use of vmalloc to alleviate boottime memory allocation imbalances on NUMA systems, utilizing flags to the alloc_large_system_hash routine in order to centralize the enabling of this behavior. In general, for each hash, we check at the early allocation point whether hash distribution is enabled, and if so we defer allocation. At the late allocation point we perform the allocation if it was not earlier deferred. These late allocation points are the same points utilized prior to the addition of alloc_large_system_hash to the kernel. Signed-off-by: Brent Casavant Signed-off-by: Andrew Morton --- 25-akpm/fs/dcache.c | 25 +++++++++++++++++++++++++ 25-akpm/fs/inode.c | 25 +++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff -puN fs/dcache.c~filesystem-hashes-numa-interleaving fs/dcache.c --- 25/fs/dcache.c~filesystem-hashes-numa-interleaving Thu Dec 23 13:54:43 2004 +++ 25-akpm/fs/dcache.c Thu Dec 23 13:54:43 2004 @@ -1574,6 +1574,12 @@ static void __init dcache_init_early(voi { int loop; + /* If hashes are distributed across NUMA nodes, defer + * hash allocation until vmalloc space is available. + */ + if (hashdist) + return; + dentry_hashtable = alloc_large_system_hash("Dentry cache", sizeof(struct hlist_head), @@ -1590,6 +1596,8 @@ static void __init dcache_init_early(voi static void __init dcache_init(unsigned long mempages) { + int loop; + /* * A constructor could be added for stable state like the lists, * but it is probably not worth it because of the cache nature @@ -1602,6 +1610,23 @@ static void __init dcache_init(unsigned NULL, NULL); set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); + + /* Hash may have been set up in dcache_init_early */ + if (!hashdist) + return; + + dentry_hashtable = + alloc_large_system_hash("Dentry cache", + sizeof(struct hlist_head), + dhash_entries, + 13, + 0, + &d_hash_shift, + &d_hash_mask, + 0); + + for (loop = 0; loop < (1 << d_hash_shift); loop++) + INIT_HLIST_HEAD(&dentry_hashtable[loop]); } /* SLAB cache for __getname() consumers */ diff -puN fs/inode.c~filesystem-hashes-numa-interleaving fs/inode.c --- 25/fs/inode.c~filesystem-hashes-numa-interleaving Thu Dec 23 13:54:43 2004 +++ 25-akpm/fs/inode.c Thu Dec 23 13:54:43 2004 @@ -1331,6 +1331,12 @@ void __init inode_init_early(void) { int loop; + /* If hashes are distributed across NUMA nodes, defer + * hash allocation until vmalloc space is available. + */ + if (hashdist) + return; + inode_hashtable = alloc_large_system_hash("Inode-cache", sizeof(struct hlist_head), @@ -1347,10 +1353,29 @@ void __init inode_init_early(void) void __init inode_init(unsigned long mempages) { + int loop; + /* inode slab cache */ inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode), 0, SLAB_PANIC, init_once, NULL); set_shrinker(DEFAULT_SEEKS, shrink_icache_memory); + + /* Hash may have been set up in inode_init_early */ + if (!hashdist) + return; + + inode_hashtable = + alloc_large_system_hash("Inode-cache", + sizeof(struct hlist_head), + ihash_entries, + 14, + 0, + &i_hash_shift, + &i_hash_mask, + 0); + + for (loop = 0; loop < (1 << i_hash_shift); loop++) + INIT_HLIST_HEAD(&inode_hashtable[loop]); } void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) _