aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>2024-03-20 11:26:33 -0400
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>2024-03-20 11:26:33 -0400
commit805d0043db4c6d645a783c1a994d2d43a8e946e1 (patch)
tree63fed658762c55b7744d46cfc6c2ce27c0387574
parentcabbbc8e64c53932e4fd570054ffb038d7806e36 (diff)
downloadlibrseq-805d0043db4c6d645a783c1a994d2d43a8e946e1.tar.gz
mempool: Rename pool policy
Rename pool policy from: RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE -> RSEQ_MEMPOOL_POPULATE_COW_INIT RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL -> RSEQ_MEMPOOL_POPULATE_COW_ZERO to make it clear to the user that the COW_ZERO pool policy benefits from leaving the memory zero-allocated. Also, now that zmalloc checks the prior content for zero to prevent useless COW of the per-cpu ranges, the term "populate all" does not fit well anymore. Copy-on-write from the zero page is clearer. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Change-Id: If97984225d971a2f48ba241b2a9570e81079bd03
-rw-r--r--include/rseq/mempool.h38
-rw-r--r--src/rseq-mempool.c95
-rw-r--r--tests/mempool_cow_race_test.c2
-rw-r--r--tests/mempool_test.c34
4 files changed, 101 insertions, 68 deletions
diff --git a/include/rseq/mempool.h b/include/rseq/mempool.h
index 8e9db5d..5f5e0a6 100644
--- a/include/rseq/mempool.h
+++ b/include/rseq/mempool.h
@@ -421,7 +421,6 @@ void *rseq_mempool_set_malloc_init(struct rseq_mempool_set *pool_set, void *init
return (void *) rseq_mempool_set_percpu_malloc_init(pool_set, init_ptr, len);
}
-
/*
* rseq_mempool_init_numa: Move pages to the NUMA node associated to their CPU topology.
*
@@ -485,7 +484,8 @@ int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
* There is a marginal runtime overhead on malloc/free operations.
*
* The memory overhead is (pool->percpu_len / pool->item_len) / CHAR_BIT
- * bytes, over the lifetime of the pool.
+ * bytes, plus one additional stride range for a separate free list,
+ * over the lifetime of the pool.
*
* Returns 0 on success, -1 with errno=EINVAL if arguments are invalid.
*/
@@ -540,24 +540,28 @@ int rseq_mempool_attr_set_poison(struct rseq_mempool_attr *attr,
enum rseq_mempool_populate_policy {
/*
- * RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE (default):
- * Do not populate pages for any of the CPUs when creating the
- * mempool. Rely on copy-on-write (COW) of per-cpu pages to
- * populate per-cpu pages from the initial values pages on
- * first write. This mempool is only meant for single-process
- * use (private mapping). Note that this type of pool cannot
- * be accessed from children processes across fork. It is
- * however valid to destroy a pool from a child process after
- * a fork to free its remaining resources.
+ * RSEQ_MEMPOOL_POPULATE_COW_INIT (default):
+ * Rely on copy-on-write (COW) of per-cpu pages to populate
+ * per-cpu pages from the initial values pages on first write.
+ * Note that this type of pool cannot be accessed from
+ * children processes across fork. It is however valid to
+ * destroy a pool from a child process after a fork to free
+ * its remaining resources.
*/
- RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE = 0,
+ RSEQ_MEMPOOL_POPULATE_COW_INIT = 0,
+
/*
- * RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL:
- * Populate pages for all CPUs from 0 to (max_nr_cpus - 1)
- * when creating the mempool. This mempool is only meant for
- * single-process use (private mapping).
+ * RSEQ_MEMPOOL_POPULATE_COW_ZERO:
+ * Rely on copy-on-write (COW) of per-cpu pages to populate
+ * per-cpu pages from the zero page on first write. As long
+ * as the user only uses malloc, zmalloc, or malloc_init with
+ * zeroed content to allocate items, it does not trigger COW
+ * of per-cpu pages, leaving in place the zero page until an
+ * active CPU writes to its per-cpu item. The recommended (and
+ * default) poison value for this pool policy is 0 to prevent
+ * useless COW page allocation.
*/
- RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL = 1,
+ RSEQ_MEMPOOL_POPULATE_COW_ZERO = 1,
};
/*
diff --git a/src/rseq-mempool.c b/src/rseq-mempool.c
index a62cf7f..c5b4617 100644
--- a/src/rseq-mempool.c
+++ b/src/rseq-mempool.c
@@ -62,6 +62,10 @@
# define DEFAULT_COW_INIT_POISON_VALUE 0x55555555UL
#endif
+/*
+ * Define the default COW_ZERO poison value as zero to prevent useless
+ * COW page allocation when writing poison values when freeing items.
+ */
#define DEFAULT_COW_ZERO_POISON_VALUE 0x0
struct free_list_node;
@@ -102,28 +106,29 @@ struct rseq_mempool_range {
/*
* Memory layout of a mempool range:
- * - Canary header page (for destroy-after-fork detection),
+ * - Canary header page (for detection of destroy-after-fork of
+ * COW_INIT pool),
* - Header page (contains struct rseq_mempool_range at the
* very end),
* - Base of the per-cpu data, starting with CPU 0.
- * Aliases with free-list for non-robust populate all pool.
+ * Aliases with free-list for non-robust COW_ZERO pool.
* - CPU 1,
* ...
* - CPU max_nr_cpus - 1
- * - init values (unpopulated for RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL).
- * Aliases with free-list for non-robust populate none pool.
+ * - init values (only allocated for COW_INIT pool).
+ * Aliases with free-list for non-robust COW_INIT pool.
* - free list (for robust pool).
*
* The free list aliases the CPU 0 memory area for non-robust
- * populate all pools. It aliases with init values for
- * non-robust populate none pools. It is located immediately
- * after the init values for robust pools.
+ * COW_ZERO pools. It aliases with init values for non-robust
+ * COW_INIT pools. It is located immediately after the init
+ * values for robust pools.
*/
void *header;
void *base;
/*
* The init values contains malloc_init/zmalloc values.
- * Pointer is NULL for RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL.
+ * Pointer is NULL for RSEQ_MEMPOOL_POPULATE_COW_ZERO.
*/
void *init;
size_t next_unused;
@@ -145,11 +150,21 @@ struct rseq_mempool {
int item_order;
/*
- * The free list chains freed items on the CPU 0 address range.
- * We should rethink this decision if false sharing between
- * malloc/free from other CPUs and data accesses from CPU 0
- * becomes an issue. This is a NULL-terminated singly-linked
- * list.
+ * COW_INIT non-robust pools:
+ * The free list chains freed items on the init
+ * values address range.
+ *
+ * COW_ZERO non-robust pools:
+ * The free list chains freed items on the CPU 0
+ * address range. We should rethink this
+ * decision if false sharing between malloc/free
+ * from other CPUs and data accesses from CPU 0
+ * becomes an issue.
+ *
+ * Robust pools: The free list chains freed items in the
+ * address range dedicated for the free list.
+ *
+ * This is a NULL-terminated singly-linked list.
*/
struct free_list_node *free_list_head;
@@ -203,12 +218,12 @@ void __rseq_percpu *__rseq_free_list_to_percpu_ptr(const struct rseq_mempool *po
/* Skip cpus. */
p -= pool->attr.max_nr_cpus * pool->attr.stride;
/* Skip init values */
- if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
+ if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
p -= pool->attr.stride;
} else {
- /* Populate none free list is in init values */
- if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
+ /* COW_INIT free list is in init values */
+ if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
p -= pool->attr.max_nr_cpus * pool->attr.stride;
}
return p;
@@ -222,12 +237,12 @@ struct free_list_node *__rseq_percpu_to_free_list_ptr(const struct rseq_mempool
/* Skip cpus. */
p += pool->attr.max_nr_cpus * pool->attr.stride;
/* Skip init values */
- if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
+ if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
p += pool->attr.stride;
} else {
- /* Populate none free list is in init values */
- if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
+ /* COW_INIT free list is in init values */
+ if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
p += pool->attr.max_nr_cpus * pool->attr.stride;
}
return (struct free_list_node *) p;
@@ -272,7 +287,7 @@ void rseq_percpu_zero_item(struct rseq_mempool *pool,
* write to the page. This eliminates useless COW over
* the zero page just for overwriting it with zeroes.
*
- * This means zmalloc() in populate all policy pool do
+ * This means zmalloc() in COW_ZERO policy pool do
* not trigger COW for CPUs which are not actively
* writing to the pool. This is however not the case for
* malloc_init() in populate-all pools if it populates
@@ -342,8 +357,8 @@ void rseq_percpu_poison_item(struct rseq_mempool *pool,
* COW of the page.
*
* It is recommended to use zero as poison value for
- * populate-all pools to eliminate COW due to writing
- * poison to unused CPU memory.
+ * COW_ZERO pools to eliminate COW due to writing
+ * poison to CPU memory still backed by the zero page.
*/
if (rseq_cmp_item(p, pool->item_len, poison, NULL) == 0)
continue;
@@ -741,10 +756,10 @@ struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
header_len = POOL_HEADER_NR_PAGES * page_size;
range_len = pool->attr.stride * pool->attr.max_nr_cpus;
- if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
+ if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
range_len += pool->attr.stride; /* init values */
if (pool->attr.robust_set)
- range_len += pool->attr.stride; /* free list */
+ range_len += pool->attr.stride; /* dedicated free list */
base = aligned_mmap_anonymous(page_size, range_len,
pool->attr.stride, &header, header_len);
if (!base)
@@ -756,7 +771,7 @@ struct rseq_mempool_range *rseq_mempool_range_create(struct rseq_mempool *pool)
range->mmap_addr = header;
range->mmap_len = header_len + range_len;
- if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL) {
+ if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT) {
range->init = base + (pool->attr.stride * pool->attr.max_nr_cpus);
/* Populate init values pages from memfd */
memfd = rseq_memfd_create_init(pool->name, pool->attr.stride);
@@ -853,7 +868,7 @@ bool pool_mappings_accessible(struct rseq_mempool *pool)
size_t page_size;
char *addr;
- if (pool->attr.populate_policy == RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
+ if (pool->attr.populate_policy != RSEQ_MEMPOOL_POPULATE_COW_INIT)
return true;
range = pool->range_list;
if (!range)
@@ -884,8 +899,8 @@ int rseq_mempool_destroy(struct rseq_mempool *pool)
/*
* Validate that the pool mappings are accessible before doing
* free list/poison validation and unmapping ranges. This allows
- * calling pool destroy in child process after a fork for
- * populate-none pools to free pool resources.
+ * calling pool destroy in child process after a fork for COW_INIT
+ * pools to free pool resources.
*/
mapping_accessible = pool_mappings_accessible(pool);
@@ -928,6 +943,19 @@ struct rseq_mempool *rseq_mempool_create(const char *pool_name,
if (_attr)
memcpy(&attr, _attr, sizeof(attr));
+ /*
+ * Validate that the pool populate policy requested is known.
+ */
+ switch (attr.populate_policy) {
+ case RSEQ_MEMPOOL_POPULATE_COW_INIT:
+ break;
+ case RSEQ_MEMPOOL_POPULATE_COW_ZERO:
+ break;
+ default:
+ errno = EINVAL;
+ return NULL;
+ }
+
switch (attr.type) {
case MEMPOOL_TYPE_PERCPU:
if (attr.max_nr_cpus < 0) {
@@ -945,8 +973,8 @@ struct rseq_mempool *rseq_mempool_create(const char *pool_name,
break;
case MEMPOOL_TYPE_GLOBAL:
/* Override populate policy for global type. */
- if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE)
- attr.populate_policy = RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL;
+ if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
+ attr.populate_policy = RSEQ_MEMPOOL_POPULATE_COW_ZERO;
/* Use a 1-cpu pool for global mempool type. */
attr.max_nr_cpus = 1;
break;
@@ -955,7 +983,7 @@ struct rseq_mempool *rseq_mempool_create(const char *pool_name,
attr.stride = RSEQ_MEMPOOL_STRIDE; /* Use default */
if (attr.robust_set && !attr.poison_set) {
attr.poison_set = true;
- if (attr.populate_policy != RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
+ if (attr.populate_policy == RSEQ_MEMPOOL_POPULATE_COW_INIT)
attr.poison = DEFAULT_COW_INIT_POISON_VALUE;
else
attr.poison = DEFAULT_COW_ZERO_POISON_VALUE;
@@ -1140,8 +1168,8 @@ void librseq_mempool_percpu_free(void __rseq_percpu *_ptr, size_t stride)
item = __rseq_percpu_to_free_list_ptr(pool, _ptr);
/*
* Setting the next pointer will overwrite the first uintptr_t
- * poison for either CPU 0 (populate all) or init data (populate
- * none).
+ * poison for either CPU 0 (COW_ZERO, non-robust), or init data
+ * (COW_INIT, non-robust).
*/
item->next = head;
pool->free_list_head = item;
@@ -1276,6 +1304,7 @@ int rseq_mempool_attr_set_init(struct rseq_mempool_attr *attr,
attr->init_set = true;
attr->init_func = init_func;
attr->init_priv = init_priv;
+ attr->populate_policy = RSEQ_MEMPOOL_POPULATE_COW_INIT;
return 0;
}
diff --git a/tests/mempool_cow_race_test.c b/tests/mempool_cow_race_test.c
index c086c4d..54e6193 100644
--- a/tests/mempool_cow_race_test.c
+++ b/tests/mempool_cow_race_test.c
@@ -79,7 +79,7 @@ static void *test_init_thread(void *arg)
ret = rseq_mempool_attr_set_max_nr_ranges(attr, 1);
if (ret)
abort();
- ret = rseq_mempool_attr_set_populate_policy(attr, RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE);
+ ret = rseq_mempool_attr_set_populate_policy(attr, RSEQ_MEMPOOL_POPULATE_COW_INIT);
if (ret)
abort();
mempool = rseq_mempool_create("test_data", sizeof(struct test_data), attr);
diff --git a/tests/mempool_test.c b/tests/mempool_test.c
index 2816831..42654dd 100644
--- a/tests/mempool_test.c
+++ b/tests/mempool_test.c
@@ -67,7 +67,7 @@ static void test_mempool_fill(enum rseq_mempool_populate_policy policy,
ok(ret == 0, "Setting mempool poison");
ret = rseq_mempool_attr_set_populate_policy(attr, policy);
ok(ret == 0, "Setting mempool populate policy to %s",
- policy == RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE ? "NONE" : "ALL");
+ policy == RSEQ_MEMPOOL_POPULATE_COW_INIT ? "COW_INIT" : "COW_ZERO");
mempool = rseq_mempool_create("test_data",
sizeof(struct test_data), attr);
ok(mempool, "Create mempool of size %zu", stride);
@@ -156,10 +156,10 @@ static void test_robust_corrupt_after_free(struct rseq_mempool *pool,
ptr = (struct test_data __rseq_percpu *) rseq_mempool_percpu_malloc(pool);
/*
* Corrupt free list: For robust pools, the free list is located
- * after the last cpu memory range for populate all, and after
- * the init values memory range for populate none.
+ * after the last cpu memory range for COW_ZERO, and after the init
+ * values memory range for COW_INIT.
*/
- if (policy == RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
+ if (policy == RSEQ_MEMPOOL_POPULATE_COW_ZERO)
cpuptr = (struct test_data *) rseq_percpu_ptr(ptr, rseq_mempool_get_max_nr_cpus(pool));
else
cpuptr = (struct test_data *) rseq_percpu_ptr(ptr, rseq_mempool_get_max_nr_cpus(pool) + 1);
@@ -187,10 +187,10 @@ static void test_robust_free_list_corruption(struct rseq_mempool *pool,
ptr = (struct test_data __rseq_percpu *) rseq_mempool_percpu_malloc(pool);
/*
* Corrupt free list: For robust pools, the free list is located
- * after the last cpu memory range for populate all, and after
- * the init values memory range for populate none.
+ * after the last cpu memory range for COW_ZERO, and after the init
+ * values memory range for COW_INIT.
*/
- if (policy == RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL)
+ if (policy == RSEQ_MEMPOOL_POPULATE_COW_ZERO)
cpuptr = (struct test_data *) rseq_percpu_ptr(ptr, rseq_mempool_get_max_nr_cpus(pool));
else
cpuptr = (struct test_data *) rseq_percpu_ptr(ptr, rseq_mempool_get_max_nr_cpus(pool) + 1);
@@ -379,8 +379,8 @@ int main(void)
for (nr_ranges = 1; nr_ranges < 32; nr_ranges <<= 1) {
/* From page size to 64kB */
for (len = rseq_get_page_len(); len < 65536; len <<= 1) {
- test_mempool_fill(RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL, nr_ranges, len);
- test_mempool_fill(RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE, nr_ranges, len);
+ test_mempool_fill(RSEQ_MEMPOOL_POPULATE_COW_ZERO, nr_ranges, len);
+ test_mempool_fill(RSEQ_MEMPOOL_POPULATE_COW_INIT, nr_ranges, len);
}
}
@@ -389,16 +389,16 @@ int main(void)
len = 65536;
/* From min(page size, 64kB) to 4MB */
for (; len < 4096 * 1024; len <<= 1) {
- test_mempool_fill(RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL, 1, len);
- test_mempool_fill(RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE, 1, len);
+ test_mempool_fill(RSEQ_MEMPOOL_POPULATE_COW_ZERO, 1, len);
+ test_mempool_fill(RSEQ_MEMPOOL_POPULATE_COW_INIT, 1, len);
}
- run_robust_tests(RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL);
- run_robust_tests(RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE);
- ok(run_fork_destroy_pool_test(fork_child, RSEQ_MEMPOOL_POPULATE_PRIVATE_ALL),
- "fork destroy pool test populate private all");
- ok(run_fork_destroy_pool_test(fork_child, RSEQ_MEMPOOL_POPULATE_PRIVATE_NONE),
- "fork destroy pool test populate private none");
+ run_robust_tests(RSEQ_MEMPOOL_POPULATE_COW_ZERO);
+ run_robust_tests(RSEQ_MEMPOOL_POPULATE_COW_INIT);
+ ok(run_fork_destroy_pool_test(fork_child, RSEQ_MEMPOOL_POPULATE_COW_ZERO),
+ "fork destroy pool test populate COW_ZERO");
+ ok(run_fork_destroy_pool_test(fork_child, RSEQ_MEMPOOL_POPULATE_COW_INIT),
+ "fork destroy pool test populate COW_INIT");
exit(exit_status());
}