From: Adam Litke The patch allows the kernel to determine if a shared memory request is suitable for huge pages. This way apps (such as DB2) benefit from huge pages without a recompile. Of course applications can still request huge pages explicitly. diff -purN linux-2.6.0-test4-virgin/include/linux/sysctl.h linux-2.6.0-test4-hugetlb/include/linux/sysctl.h --- linux-2.6.0-test4-virgin/include/linux/sysctl.h 2003-09-03 13:34:53.000000000 -0700 +++ linux-2.6.0-test4-hugetlb/include/linux/sysctl.h 2003-09-19 16:23:45.000000000 -0700 @@ -127,6 +127,8 @@ enum KERN_PANIC_ON_OOPS=57, /* int: whether we will panic on an oops */ KERN_HPPA_PWRSW=58, /* int: hppa soft-power enable */ KERN_HPPA_UNALIGNED=59, /* int: hppa unaligned-trap enable */ + KERN_SHMUSEHUGEPAGES=60, /* int: use bigpages wherever possible */ + KERN_HPAGES_PER_FILE=61, /* int: max bigpages per file */ }; diff -purN linux-2.6.0-test4-virgin/ipc/shm.c linux-2.6.0-test4-hugetlb/ipc/shm.c --- linux-2.6.0-test4-virgin/ipc/shm.c 2003-08-08 21:36:49.000000000 -0700 +++ linux-2.6.0-test4-hugetlb/ipc/shm.c 2003-09-23 11:00:23.000000000 -0700 @@ -32,6 +32,9 @@ #define shm_flags shm_perm.mode +extern int shm_use_hugepages; +extern int shm_hugepages_per_file; + static struct file_operations shm_file_operations; static struct vm_operations_struct shm_vm_ops; @@ -194,8 +197,13 @@ static int newseg (key_t key, int shmflg return error; } - if (shmflg & SHM_HUGETLB) + if (shm_use_hugepages && (size & HPAGE_MASK) && + ((size >> HPAGE_SHIFT) <= shm_hugepages_per_file)) + shmflg |= SHM_HUGETLB; + + if (shmflg & SHM_HUGETLB) { file = hugetlb_zero_setup(size); + } else { sprintf (name, "SYSV%08x", key); file = shmem_file_setup(name, size, VM_ACCOUNT); diff -purN linux-2.6.0-test4-virgin/kernel/sysctl.c linux-2.6.0-test4-hugetlb/kernel/sysctl.c --- linux-2.6.0-test4-virgin/kernel/sysctl.c 2003-08-08 21:32:33.000000000 -0700 +++ linux-2.6.0-test4-hugetlb/kernel/sysctl.c 2003-09-22 10:16:54.000000000 -0700 @@ -58,6 +58,7 @@ extern int cad_pid; extern int pid_max; extern int sysctl_lower_zone_protection; extern int min_free_kbytes; +extern int shm_use_hugepages, shm_hugepages_per_file; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; @@ -580,6 +581,24 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_HUGETLBFS + { + .ctl_name = KERN_SHMUSEHUGEPAGES, + .procname = "shm-use-bigpages", + .data = &shm_use_hugepages, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = KERN_HPAGES_PER_FILE, + .procname = "shm-bigpages-per-file", + .data = &shm_hugepages_per_file, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif { .ctl_name = 0 } }; diff -purN linux-2.6.0-test4-virgin/mm/shmem.c linux-2.6.0-test4-hugetlb/mm/shmem.c --- linux-2.6.0-test4-virgin/mm/shmem.c 2003-09-03 13:34:53.000000000 -0700 +++ linux-2.6.0-test4-hugetlb/mm/shmem.c 2003-09-19 16:13:29.000000000 -0700 @@ -40,6 +40,27 @@ #include #include +int shm_use_hugepages; + +/* + * On 64bit archs the vmalloc area is very large, + * so we allocate the array in vmalloc on 64bit archs. + * + * Assuming 2M pages (x86 and x86-64) those default setting + * will allow up to 128G of bigpages in a single file on + * 64bit archs and 64G on 32bit archs using the max + * kmalloc size of 128k. So tweaking in practice is needed + * only to go past 128G of bigpages per file on 64bit archs. + * + * This sysctl is in page units (each page large BIGPAGE_SIZE). + */ +#if BITS_PER_LONG == 64 +int shm_hugepages_per_file = 128UL << (30 - HPAGE_SHIFT); +#else +int shm_hugepages_per_file = 131072 / sizeof(struct page *); +#endif + + /* This magic number is used in glibc for posix shared memory */ #define TMPFS_MAGIC 0x01021994