diff options
author | Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | 2024-03-20 11:26:33 -0400 |
---|---|---|
committer | Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | 2024-03-20 11:26:33 -0400 |
commit | 805d0043db4c6d645a783c1a994d2d43a8e946e1 (patch) | |
tree | 63fed658762c55b7744d46cfc6c2ce27c0387574 | |
parent | cabbbc8e64c53932e4fd570054ffb038d7806e36 (diff) | |
download | librseq-805d0043db4c6d645a783c1a994d2d43a8e946e1.tar.gz |
mempool: Rename pool policy
Rename pool policy from:
RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE -> RSEQ_MEMPOOL_POPULATE_COW_INIT
RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL -> RSEQ_MEMPOOL_POPULATE_COW_ZERO
to make it clear to the user that the COW_ZERO pool policy benefits from
leaving the memory zero-allocated.
Also, now that zmalloc checks the prior content for zero to prevent
useless COW of the per-cpu ranges, the term "populate all" does not fit
well anymore. Copy-on-write from the zero page is clearer.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: If97984225d971a2f48ba241b2a9570e81079bd03
-rw-r--r-- | include/rseq/mempool.h | 38 | ||||
-rw-r--r-- | src/rseq-mempool.c | 95 | ||||
-rw-r--r-- | tests/mempool_cow_race_test.c | 2 | ||||
-rw-r--r-- | tests/mempool_test.c | 34 |
4 files changed, 101 insertions, 68 deletions
diff --git a/include/rseq/mempool.h b/include/rseq/mempool.h index 8e9db5d..5f5e0a6 100644 --- a/include/rseq/mempool.h +++ b/include/rseq/mempool.h @@ -421,7 +421,6 @@ void *rseq_mempool_set_malloc_init(struct rseq_mempool_set *pool_set, void *init return (void *) rseq_mempool_set_percpu_malloc_init(pool_set, init_ptr, len); } - /* * rseq_mempool_init_numa: Move pages to the NUMA node associated to their CPU topology. * @@ -485,7 +484,8 @@ int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr, * There is a marginal runtime overhead on malloc/free operations. * * The memory overhead is (pool->percpu_len / pool->item_len) / CHAR_BIT - * bytes, over the lifetime of the pool. + * bytes, plus one additional stride range for a separate free list, + * over the lifetime of the pool. * * Returns 0 on success, -1 with errno=EINVAL if arguments are invalid. */ @@ -540,24 +540,28 @@ int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr, enum rseq_mempool_populate_policy { /* - * RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE (default): - * Do not populate pages for any of the CPUs when creating the - * mempool. Rely on copy-on-write (COW) of per-cpu pages to - * populate per-cpu pages from the initial values pages on - * first write. This mempool is only meant for single-process - * use (private mapping). Note that this type of pool cannot - * be accessed from children processes across fork. It is - * however valid to destroy a pool from a child process after - * a fork to free its remaining resources. + * RSEQ_MEMPOOL_POPULATE_COW_INIT (default): + * Rely on copy-on-write (COW) of per-cpu pages to populate + * per-cpu pages from the initial values pages on first write. + * Note that this type of pool cannot be accessed from + * children processes across fork. It is however valid to + * destroy a pool from a child process after a fork to free + * its remaining resources. */ - RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE = 0, + RSEQ_MEMPOOL_POPULATE_COW_INIT = 0, + /* - * RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL: - * Populate pages for all CPUs from 0 to (max_nr_cpus - 1) - * when creating the mempool. This mempool is only meant for - * single-process use (private mapping). + * RSEQ_MEMPOOL_POPULATE_COW_ZERO: + * Rely on copy-on-write (COW) of per-cpu pages to populate + * per-cpu pages from the zero page on first write. As long + * as the user only uses malloc, zmalloc, or malloc_init with + * zeroed content to allocate items, it does not trigger COW + * of per-cpu pages, leaving in place the zero page until an + * active CPU writes to its per-cpu item. The recommended (and + * default) poison value for this pool policy is 0 to prevent + * useless COW page allocation. */ - RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL = 1, + RSEQ_MEMPOOL_POPULATE_COW_ZERO = 1, }; /* diff --git a/src/rseq-mempool.c b/src/rseq-mempool.c index a62cf7f..c5b4617 100644 --- a/src/rseq-mempool.c +++ b/src/rseq-mempool.c @@ -62,6 +62,10 @@ # define DEFAULT_COW_INIT_POISON_VALUE 0x55555555UL #endif +/* + * Define the default COW_ZERO poison value as zero to prevent useless + * COW page allocation when writing poison values when freeing items. + */ #define DEFAULT_COW_ZERO_POISON_VALUE 0x0 struct free_list_node; @@ -102,28 +106,29 @@ struct rseq_mempool_range { /* * Memory layout of a mempool range: - * - Canary header page (for destroy-after-fork detection), + * - Canary header page (for detection of destroy-after-fork of + * COW_INIT pool), * - Header page (contains struct rseq_mempool_range at the * very end), * - Base of the per-cpu data, starting with CPU 0. - * Aliases with free-list for non-robust populate all pool. + * Aliases with free-list for non-robust COW_ZERO pool. * - CPU 1, * ... * - CPU max_nr_cpus - 1 - * - init values (unpopulated for RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL). - * Aliases with free-list for non-robust populate none pool. + * - init values (only allocated for COW_INIT pool). + * Aliases with free-list for non-robust COW_INIT pool. * - free list (for robust pool). * * The free list aliases the CPU 0 memory area for non-robust - * populate all pools. It aliases with init values for - * non-robust populate none pools. It is located immediately - * after the init values for robust pools. + * COW_ZERO pools. It aliases with init values for non-robust + * COW_INIT pools. It is located immediately after the init + * values for robust pools. */ void *header; void *base; /* * The init values contains malloc_init/zmalloc values. - * Pointer is NULL for RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL. + * Pointer is NULL for RSEQ_MEMPOOL_POPULATE_COW_ZERO. */ void *init; size_t next_unused; @@ -145,11 +150,21 @@ struct rseq_mempool { int item_order; /* - * The free list chains freed items on the CPU 0 address range. - * We should rethink this decision if false sharing between - * malloc/free from other CPUs and data accesses from CPU 0 - * becomes an issue. This is a NULL-terminated singly-linked - * list. + * COW_INIT non-robust pools: + * The free list chains freed items on the init + * values address range. + * + * COW_ZERO non-robust pools: + * The free list chains freed items on the CPU 0 + * address range. We should rethink this + * decision if false sharing between malloc/free + * from other CPUs and data accesses from CPU 0 + * becomes an issue. + * + * Robust pools: The free list chains freed items in the + * address range dedicated for the free list. + * + * This is a NULL-terminated singly-linked list. */ struct free_list_node *free_list_head; @@ -203,12 +218,12 @@ void __rseq_percpu *__rseq_free_list_to_percpu_ptr(const struct rseq_mempool *po /* Skip cpus. */ p -= pool->attr.max_nr_cpus * pool->attr.stride; /* Skip init values */ - if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL) + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) p -= pool->attr.stride; } else { - /* Populate none free list is in init values */ - if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL) + /* COW_INIT free list is in init values */ + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) p -= pool->attr.max_nr_cpus * pool->attr.stride; } return p; @@ -222,12 +237,12 @@ struct free_list_node *__rseq_percpu_to_free_list_ptr(const struct rseq_mempool /* Skip cpus. */ p += pool->attr.max_nr_cpus * pool->attr.stride; /* Skip init values */ - if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL) + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) p += pool->attr.stride; } else { - /* Populate none free list is in init values */ - if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL) + /* COW_INIT free list is in init values */ + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) p += pool->attr.max_nr_cpus * pool->attr.stride; } return (struct free_list_node *) p; @@ -272,7 +287,7 @@ void rseq_percpu_zero_item(struct rseq_mempool *pool, * write to the page. This eliminates useless COW over * the zero page just for overwriting it with zeroes. * - * This means zmalloc() in populate all policy pool do + * This means zmalloc() in COW_ZERO policy pool do * not trigger COW for CPUs which are not actively * writing to the pool. This is however not the case for * malloc_init() in populate-all pools if it populates @@ -342,8 +357,8 @@ void rseq_percpu_poison_item(struct rseq_mempool *pool, * COW of the page. * * It is recommended to use zero as poison value for - * populate-all pools to eliminate COW due to writing - * poison to unused CPU memory. + * COW_ZERO pools to eliminate COW due to writing + * poison to CPU memory still backed by the zero page. */ if (rseq_cmp_item(p, pool->item_len, poison, NULL) == 0) continue; @@ -741,10 +756,10 @@ struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool) header_len = POOL_HEADER_NR_PAGES * page_size; range_len = pool->attr.stride * pool->attr.max_nr_cpus; - if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL) + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) range_len += pool->attr.stride; /* init values */ if (pool->attr.robust_set) - range_len += pool->attr.stride; /* free list */ + range_len += pool->attr.stride; /* dedicated free list */ base = aligned_mmap_anonymous(page_size, range_len, pool->attr.stride, &header, header_len); if (!base) @@ -756,7 +771,7 @@ struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool) range->mmap_addr = header; range->mmap_len = header_len + range_len; - if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL) { + if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) { range->init = base + (pool->attr.stride * pool->attr.max_nr_cpus); /* Populate init values pages from memfd */ memfd = rseq_memfd_create_init(pool->name, pool->attr.stride); @@ -853,7 +868,7 @@ bool pool_mappings_accessible(struct rseq_mempool *pool) size_t page_size; char *addr; - if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL) + if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_COW_INIT) return true; range = pool->range_list; if (!range) @@ -884,8 +899,8 @@ int rseq_mempool_destroy(struct rseq_mempool *pool) /* * Validate that the pool mappings are accessible before doing * free list/poison validation and unmapping ranges. This allows - * calling pool destroy in child process after a fork for - * populate-none pools to free pool resources. + * calling pool destroy in child process after a fork for COW_INIT + * pools to free pool resources. */ mapping_accessible = pool_mappings_accessible(pool); @@ -928,6 +943,19 @@ struct rseq_mempool *rseq_mempool_create(const char *pool_name, if (_attr) memcpy(&attr, _attr, sizeof(attr)); + /* + * Validate that the pool populate policy requested is known. + */ + switch (attr.populate_policy) { + case RSEQ_MEMPOOL_POPULATE_COW_INIT: + break; + case RSEQ_MEMPOOL_POPULATE_COW_ZERO: + break; + default: + errno = EINVAL; + return NULL; + } + switch (attr.type) { case MEMPOOL_TYPE_PERCPU: if (attr.max_nr_cpus < 0) { @@ -945,8 +973,8 @@ struct rseq_mempool *rseq_mempool_create(const char *pool_name, break; case MEMPOOL_TYPE_GLOBAL: /* Override populate policy for global type. */ - if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE) - attr.populate_policy = RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL; + if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) + attr.populate_policy = RSEQ_MEMPOOL_POPULATE_COW_ZERO; /* Use a 1-cpu pool for global mempool type. */ attr.max_nr_cpus = 1; break; @@ -955,7 +983,7 @@ struct rseq_mempool *rseq_mempool_create(const char *pool_name, attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */ if (attr.robust_set && !attr.poison_set) { attr.poison_set = true; - if (attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL) + if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) attr.poison = DEFAULT_COW_INIT_POISON_VALUE; else attr.poison = DEFAULT_COW_ZERO_POISON_VALUE; @@ -1140,8 +1168,8 @@ void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride) item = __rseq_percpu_to_free_list_ptr(pool, _ptr); /* * Setting the next pointer will overwrite the first uintptr_t - * poison for either CPU 0 (populate all) or init data (populate - * none). + * poison for either CPU 0 (COW_ZERO, non-robust), or init data + * (COW_INIT, non-robust). */ item->next = head; pool->free_list_head = item; @@ -1276,6 +1304,7 @@ int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr, attr->init_set = true; attr->init_func = init_func; attr->init_priv = init_priv; + attr->populate_policy = RSEQ_MEMPOOL_POPULATE_COW_INIT; return 0; } diff --git a/tests/mempool_cow_race_test.c b/tests/mempool_cow_race_test.c index c086c4d..54e6193 100644 --- a/tests/mempool_cow_race_test.c +++ b/tests/mempool_cow_race_test.c @@ -79,7 +79,7 @@ static void *test_init_thread(void *arg) ret = rseq_mempool_attr_set_max_nr_ranges(attr, 1); if (ret) abort(); - ret = rseq_mempool_attr_set_populate_policy(attr, RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE); + ret = rseq_mempool_attr_set_populate_policy(attr, RSEQ_MEMPOOL_POPULATE_COW_INIT); if (ret) abort(); mempool = rseq_mempool_create("test_data", sizeof(struct test_data), attr); diff --git a/tests/mempool_test.c b/tests/mempool_test.c index 2816831..42654dd 100644 --- a/tests/mempool_test.c +++ b/tests/mempool_test.c @@ -67,7 +67,7 @@ static void test_mempool_fill(enum rseq_mempool_populate_policy policy, ok(ret == 0, "Setting mempool poison"); ret = rseq_mempool_attr_set_populate_policy(attr, policy); ok(ret == 0, "Setting mempool populate policy to %s", - policy == RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE ? "NONE" : "ALL"); + policy == RSEQ_MEMPOOL_POPULATE_COW_INIT ? "COW_INIT" : "COW_ZERO"); mempool = rseq_mempool_create("test_data", sizeof(struct test_data), attr); ok(mempool, "Create mempool of size %zu", stride); @@ -156,10 +156,10 @@ static void test_robust_corrupt_after_free(struct rseq_mempool *pool, ptr = (struct test_data __rseq_percpu *) rseq_mempool_percpu_malloc(pool); /* * Corrupt free list: For robust pools, the free list is located - * after the last cpu memory range for populate all, and after - * the init values memory range for populate none. + * after the last cpu memory range for COW_ZERO, and after the init + * values memory range for COW_INIT. */ - if (policy == RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL) + if (policy == RSEQ_MEMPOOL_POPULATE_COW_ZERO) cpuptr = (struct test_data *) rseq_percpu_ptr(ptr, rseq_mempool_get_max_nr_cpus(pool)); else cpuptr = (struct test_data *) rseq_percpu_ptr(ptr, rseq_mempool_get_max_nr_cpus(pool) + 1); @@ -187,10 +187,10 @@ static void test_robust_free_list_corruption(struct rseq_mempool *pool, ptr = (struct test_data __rseq_percpu *) rseq_mempool_percpu_malloc(pool); /* * Corrupt free list: For robust pools, the free list is located - * after the last cpu memory range for populate all, and after - * the init values memory range for populate none. + * after the last cpu memory range for COW_ZERO, and after the init + * values memory range for COW_INIT. */ - if (policy == RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL) + if (policy == RSEQ_MEMPOOL_POPULATE_COW_ZERO) cpuptr = (struct test_data *) rseq_percpu_ptr(ptr, rseq_mempool_get_max_nr_cpus(pool)); else cpuptr = (struct test_data *) rseq_percpu_ptr(ptr, rseq_mempool_get_max_nr_cpus(pool) + 1); @@ -379,8 +379,8 @@ int main(void) for (nr_ranges = 1; nr_ranges < 32; nr_ranges <<= 1) { /* From page size to 64kB */ for (len = rseq_get_page_len(); len < 65536; len <<= 1) { - test_mempool_fill(RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL, nr_ranges, len); - test_mempool_fill(RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE, nr_ranges, len); + test_mempool_fill(RSEQ_MEMPOOL_POPULATE_COW_ZERO, nr_ranges, len); + test_mempool_fill(RSEQ_MEMPOOL_POPULATE_COW_INIT, nr_ranges, len); } } @@ -389,16 +389,16 @@ int main(void) len = 65536; /* From min(page size, 64kB) to 4MB */ for (; len < 4096 * 1024; len <<= 1) { - test_mempool_fill(RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL, 1, len); - test_mempool_fill(RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE, 1, len); + test_mempool_fill(RSEQ_MEMPOOL_POPULATE_COW_ZERO, 1, len); + test_mempool_fill(RSEQ_MEMPOOL_POPULATE_COW_INIT, 1, len); } - run_robust_tests(RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL); - run_robust_tests(RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE); - ok(run_fork_destroy_pool_test(fork_child, RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL), - "fork destroy pool test populate private all"); - ok(run_fork_destroy_pool_test(fork_child, RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE), - "fork destroy pool test populate private none"); + run_robust_tests(RSEQ_MEMPOOL_POPULATE_COW_ZERO); + run_robust_tests(RSEQ_MEMPOOL_POPULATE_COW_INIT); + ok(run_fork_destroy_pool_test(fork_child, RSEQ_MEMPOOL_POPULATE_COW_ZERO), + "fork destroy pool test populate COW_ZERO"); + ok(run_fork_destroy_pool_test(fork_child, RSEQ_MEMPOOL_POPULATE_COW_INIT), + "fork destroy pool test populate COW_INIT"); exit(exit_status()); } |