aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2023-03-25 19:52:52 -0700
committerAlexei Starovoitov <ast@kernel.org>2023-03-25 19:52:52 -0700
commit8d2759608c1f39fc2d60702bf0a7421be246d209 (patch)
treec5ac30ad68786fa9636d46cb2dbb8d1f36fccc04
parente99360762a9cbd93bf1d352e90df5df78daa2f90 (diff)
parentcbe9d93d58b16b5912498ea42b5173022fff7c04 (diff)
downloadbpf-8d2759608c1f39fc2d60702bf0a7421be246d209.tar.gz
Merge branch 'bpf: Use bpf_mem_cache_alloc/free in bpf_local_storage'
Martin KaFai Lau says: ==================== From: Martin KaFai Lau <martin.lau@kernel.org> This set is a continuation of the effort in using bpf_mem_cache_alloc/free in bpf_local_storage [1] Major change is only using bpf_mem_alloc for task and cgrp storage while sk and inode stay with kzalloc/kfree. The details is in patch 2. [1]: https://lore.kernel.org/bpf/20230308065936.1550103-1-martin.lau@linux.dev/ v3: - Only use bpf_mem_alloc for task and cgrp storage. - sk and inode storage stay with kzalloc/kfree. - Check NULL and add comments in bpf_mem_cache_raw_free() in patch 1. - Added test and benchmark for task storage. v2: - Added bpf_mem_cache_alloc_flags() and bpf_mem_cache_raw_free() to hide the internal data structure of the bpf allocator. - Fixed a typo bug in bpf_selem_free() - Simplified the test_local_storage test by directly using err returned from libbpf ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/linux/bpf_local_storage.h7
-rw-r--r--include/linux/bpf_mem_alloc.h2
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c2
-rw-r--r--kernel/bpf/bpf_inode_storage.c2
-rw-r--r--kernel/bpf/bpf_local_storage.c223
-rw-r--r--kernel/bpf/bpf_task_storage.c2
-rw-r--r--kernel/bpf/memalloc.c59
-rw-r--r--net/core/bpf_sk_storage.c2
-rw-r--r--tools/testing/selftests/bpf/bench.c2
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_create.c151
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_local_storage.c7
-rw-r--r--tools/testing/selftests/bpf/progs/bench_local_storage_create.c25
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage.c56
13 files changed, 472 insertions, 68 deletions
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index a34f61467a2f17..173ec7f43ed1e0 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -13,6 +13,7 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/types.h>
+#include <linux/bpf_mem_alloc.h>
#include <uapi/linux/btf.h>
#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
@@ -55,6 +56,9 @@ struct bpf_local_storage_map {
u32 bucket_log;
u16 elem_size;
u16 cache_idx;
+ struct bpf_mem_alloc selem_ma;
+ struct bpf_mem_alloc storage_ma;
+ bool bpf_ma;
};
struct bpf_local_storage_data {
@@ -122,7 +126,8 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
struct bpf_map *
bpf_local_storage_map_alloc(union bpf_attr *attr,
- struct bpf_local_storage_cache *cache);
+ struct bpf_local_storage_cache *cache,
+ bool bpf_ma);
struct bpf_local_storage_data *
bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index a7104af61ab4dd..3929be5743f495 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -31,5 +31,7 @@ void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);
/* kmem_cache_alloc/free equivalent: */
void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma);
void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr);
+void bpf_mem_cache_raw_free(void *ptr);
+void *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags);
#endif /* _BPF_MEM_ALLOC_H */
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index f5b016a5484de5..d17d5b694668e0 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -149,7 +149,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
{
- return bpf_local_storage_map_alloc(attr, &cgroup_cache);
+ return bpf_local_storage_map_alloc(attr, &cgroup_cache, true);
}
static void cgroup_storage_map_free(struct bpf_map *map)
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 9a5f0515189884..e17ad581b9be60 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -199,7 +199,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
{
- return bpf_local_storage_map_alloc(attr, &inode_cache);
+ return bpf_local_storage_map_alloc(attr, &inode_cache, false);
}
static void inode_storage_map_free(struct bpf_map *map)
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 351d991694cbe8..dab2ff4c99d95c 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -80,8 +80,24 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
if (charge_mem && mem_charge(smap, owner, smap->elem_size))
return NULL;
- selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
- gfp_flags | __GFP_NOWARN);
+ if (smap->bpf_ma) {
+ migrate_disable();
+ selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags);
+ migrate_enable();
+ if (selem)
+ /* Keep the original bpf_map_kzalloc behavior
+ * before started using the bpf_mem_cache_alloc.
+ *
+ * No need to use zero_map_value. The bpf_selem_free()
+ * only does bpf_mem_cache_free when there is
+ * no other bpf prog is using the selem.
+ */
+ memset(SDATA(selem)->data, 0, smap->map.value_size);
+ } else {
+ selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
+ gfp_flags | __GFP_NOWARN);
+ }
+
if (selem) {
if (value)
copy_map_value(&smap->map, SDATA(selem)->data, value);
@@ -95,33 +111,96 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
return NULL;
}
+/* rcu tasks trace callback for bpf_ma == false */
+static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
+{
+ struct bpf_local_storage *local_storage;
+
+ /* If RCU Tasks Trace grace period implies RCU grace period, do
+ * kfree(), else do kfree_rcu().
+ */
+ local_storage = container_of(rcu, struct bpf_local_storage, rcu);
+ if (rcu_trace_implies_rcu_gp())
+ kfree(local_storage);
+ else
+ kfree_rcu(local_storage, rcu);
+}
+
static void bpf_local_storage_free_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage *local_storage;
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
- kfree(local_storage);
+ bpf_mem_cache_raw_free(local_storage);
}
static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
{
- /* If RCU Tasks Trace grace period implies RCU grace period, do
- * kfree(), else do kfree_rcu().
- */
if (rcu_trace_implies_rcu_gp())
bpf_local_storage_free_rcu(rcu);
else
call_rcu(rcu, bpf_local_storage_free_rcu);
}
+/* Handle bpf_ma == false */
+static void __bpf_local_storage_free(struct bpf_local_storage *local_storage,
+ bool vanilla_rcu)
+{
+ if (vanilla_rcu)
+ kfree_rcu(local_storage, rcu);
+ else
+ call_rcu_tasks_trace(&local_storage->rcu,
+ __bpf_local_storage_free_trace_rcu);
+}
+
static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
- bool reuse_now)
+ struct bpf_local_storage_map *smap,
+ bool bpf_ma, bool reuse_now)
{
- if (!reuse_now)
+ if (!bpf_ma) {
+ __bpf_local_storage_free(local_storage, reuse_now);
+ return;
+ }
+
+ if (!reuse_now) {
call_rcu_tasks_trace(&local_storage->rcu,
bpf_local_storage_free_trace_rcu);
- else
+ return;
+ }
+
+ if (smap) {
+ migrate_disable();
+ bpf_mem_cache_free(&smap->storage_ma, local_storage);
+ migrate_enable();
+ } else {
+ /* smap could be NULL if the selem that triggered
+ * this 'local_storage' creation had been long gone.
+ * In this case, directly do call_rcu().
+ */
call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu);
+ }
+}
+
+/* rcu tasks trace callback for bpf_ma == false */
+static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
+{
+ struct bpf_local_storage_elem *selem;
+
+ selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
+ if (rcu_trace_implies_rcu_gp())
+ kfree(selem);
+ else
+ kfree_rcu(selem, rcu);
+}
+
+/* Handle bpf_ma == false */
+static void __bpf_selem_free(struct bpf_local_storage_elem *selem,
+ bool vanilla_rcu)
+{
+ if (vanilla_rcu)
+ kfree_rcu(selem, rcu);
+ else
+ call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu);
}
static void bpf_selem_free_rcu(struct rcu_head *rcu)
@@ -129,7 +208,7 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
struct bpf_local_storage_elem *selem;
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
- kfree(selem);
+ bpf_mem_cache_raw_free(selem);
}
static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
@@ -145,10 +224,23 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
bool reuse_now)
{
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
- if (!reuse_now)
+
+ if (!smap->bpf_ma) {
+ __bpf_selem_free(selem, reuse_now);
+ return;
+ }
+
+ if (!reuse_now) {
call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu);
- else
- call_rcu(&selem->rcu, bpf_selem_free_rcu);
+ } else {
+ /* Instead of using the vanilla call_rcu(),
+ * bpf_mem_cache_free will be able to reuse selem
+ * immediately.
+ */
+ migrate_disable();
+ bpf_mem_cache_free(&smap->selem_ma, selem);
+ migrate_enable();
+ }
}
/* local_storage->lock must be held and selem->local_storage == local_storage.
@@ -209,11 +301,47 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
return free_local_storage;
}
+static bool check_storage_bpf_ma(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_map *storage_smap,
+ struct bpf_local_storage_elem *selem)
+{
+
+ struct bpf_local_storage_map *selem_smap;
+
+ /* local_storage->smap may be NULL. If it is, get the bpf_ma
+ * from any selem in the local_storage->list. The bpf_ma of all
+ * local_storage and selem should have the same value
+ * for the same map type.
+ *
+ * If the local_storage->list is already empty, the caller will not
+ * care about the bpf_ma value also because the caller is not
+ * responsibile to free the local_storage.
+ */
+
+ if (storage_smap)
+ return storage_smap->bpf_ma;
+
+ if (!selem) {
+ struct hlist_node *n;
+
+ n = rcu_dereference_check(hlist_first_rcu(&local_storage->list),
+ bpf_rcu_lock_held());
+ if (!n)
+ return false;
+
+ selem = hlist_entry(n, struct bpf_local_storage_elem, snode);
+ }
+ selem_smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
+
+ return selem_smap->bpf_ma;
+}
+
static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
bool reuse_now)
{
+ struct bpf_local_storage_map *storage_smap;
struct bpf_local_storage *local_storage;
- bool free_local_storage = false;
+ bool bpf_ma, free_local_storage = false;
unsigned long flags;
if (unlikely(!selem_linked_to_storage_lockless(selem)))
@@ -222,6 +350,10 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
local_storage = rcu_dereference_check(selem->local_storage,
bpf_rcu_lock_held());
+ storage_smap = rcu_dereference_check(local_storage->smap,
+ bpf_rcu_lock_held());
+ bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, selem);
+
raw_spin_lock_irqsave(&local_storage->lock, flags);
if (likely(selem_linked_to_storage(selem)))
free_local_storage = bpf_selem_unlink_storage_nolock(
@@ -229,7 +361,7 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
if (free_local_storage)
- bpf_local_storage_free(local_storage, reuse_now);
+ bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now);
}
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
@@ -349,8 +481,15 @@ int bpf_local_storage_alloc(void *owner,
if (err)
return err;
- storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
- gfp_flags | __GFP_NOWARN);
+ if (smap->bpf_ma) {
+ migrate_disable();
+ storage = bpf_mem_cache_alloc_flags(&smap->storage_ma, gfp_flags);
+ migrate_enable();
+ } else {
+ storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
+ gfp_flags | __GFP_NOWARN);
+ }
+
if (!storage) {
err = -ENOMEM;
goto uncharge;
@@ -396,7 +535,7 @@ int bpf_local_storage_alloc(void *owner,
return 0;
uncharge:
- bpf_local_storage_free(storage, true);
+ bpf_local_storage_free(storage, smap, smap->bpf_ma, true);
mem_uncharge(smap, owner, sizeof(*storage));
return err;
}
@@ -609,11 +748,15 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
{
+ struct bpf_local_storage_map *storage_smap;
struct bpf_local_storage_elem *selem;
- bool free_storage = false;
+ bool bpf_ma, free_storage = false;
struct hlist_node *n;
unsigned long flags;
+ storage_smap = rcu_dereference_check(local_storage->smap, bpf_rcu_lock_held());
+ bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, NULL);
+
/* Neither the bpf_prog nor the bpf_map's syscall
* could be modifying the local_storage->list now.
* Thus, no elem can be added to or deleted from the
@@ -641,7 +784,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
if (free_storage)
- bpf_local_storage_free(local_storage, true);
+ bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true);
}
u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
@@ -654,13 +797,25 @@ u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
return usage;
}
+/* When bpf_ma == true, the bpf_mem_alloc is used to allocate and free memory.
+ * A deadlock free allocator is useful for storage that the bpf prog can easily
+ * get a hold of the owner PTR_TO_BTF_ID in any context. eg. bpf_get_current_task_btf.
+ * The task and cgroup storage fall into this case. The bpf_mem_alloc reuses
+ * memory immediately. To be reuse-immediate safe, the owner destruction
+ * code path needs to go through a rcu grace period before calling
+ * bpf_local_storage_destroy().
+ *
+ * When bpf_ma == false, the kmalloc and kfree are used.
+ */
struct bpf_map *
bpf_local_storage_map_alloc(union bpf_attr *attr,
- struct bpf_local_storage_cache *cache)
+ struct bpf_local_storage_cache *cache,
+ bool bpf_ma)
{
struct bpf_local_storage_map *smap;
unsigned int i;
u32 nbuckets;
+ int err;
smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
if (!smap)
@@ -675,8 +830,8 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
nbuckets, GFP_USER | __GFP_NOWARN);
if (!smap->buckets) {
- bpf_map_area_free(smap);
- return ERR_PTR(-ENOMEM);
+ err = -ENOMEM;
+ goto free_smap;
}
for (i = 0; i < nbuckets; i++) {
@@ -687,8 +842,26 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
smap->elem_size = offsetof(struct bpf_local_storage_elem,
sdata.data[attr->value_size]);
+ smap->bpf_ma = bpf_ma;
+ if (bpf_ma) {
+ err = bpf_mem_alloc_init(&smap->selem_ma, smap->elem_size, false);
+ if (err)
+ goto free_smap;
+
+ err = bpf_mem_alloc_init(&smap->storage_ma, sizeof(struct bpf_local_storage), false);
+ if (err) {
+ bpf_mem_alloc_destroy(&smap->selem_ma);
+ goto free_smap;
+ }
+ }
+
smap->cache_idx = bpf_local_storage_cache_idx_get(cache);
return &smap->map;
+
+free_smap:
+ kvfree(smap->buckets);
+ bpf_map_area_free(smap);
+ return ERR_PTR(err);
}
void bpf_local_storage_map_free(struct bpf_map *map,
@@ -754,6 +927,10 @@ void bpf_local_storage_map_free(struct bpf_map *map,
*/
synchronize_rcu();
+ if (smap->bpf_ma) {
+ bpf_mem_alloc_destroy(&smap->selem_ma);
+ bpf_mem_alloc_destroy(&smap->storage_ma);
+ }
kvfree(smap->buckets);
bpf_map_area_free(smap);
}
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index ab5bd1ef58c452..d1af0c8f9ce46f 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -309,7 +309,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
{
- return bpf_local_storage_map_alloc(attr, &task_cache);
+ return bpf_local_storage_map_alloc(attr, &task_cache, true);
}
static void task_storage_map_free(struct bpf_map *map)
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 5fcdacbb84394b..410637c225fb71 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -121,15 +121,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head)
return entry;
}
-static void *__alloc(struct bpf_mem_cache *c, int node)
+static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
{
- /* Allocate, but don't deplete atomic reserves that typical
- * GFP_ATOMIC would do. irq_work runs on this cpu and kmalloc
- * will allocate from the current numa node which is what we
- * want here.
- */
- gfp_t flags = GFP_NOWAIT | __GFP_NOWARN | __GFP_ACCOUNT;
-
if (c->percpu_size) {
void **obj = kmalloc_node(c->percpu_size, flags, node);
void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
@@ -185,7 +178,12 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node)
*/
obj = __llist_del_first(&c->free_by_rcu);
if (!obj) {
- obj = __alloc(c, node);
+ /* Allocate, but don't deplete atomic reserves that typical
+ * GFP_ATOMIC would do. irq_work runs on this cpu and kmalloc
+ * will allocate from the current numa node which is what we
+ * want here.
+ */
+ obj = __alloc(c, node, GFP_NOWAIT | __GFP_NOWARN | __GFP_ACCOUNT);
if (!obj)
break;
}
@@ -676,3 +674,46 @@ void notrace bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr)
unit_free(this_cpu_ptr(ma->cache), ptr);
}
+
+/* Directly does a kfree() without putting 'ptr' back to the free_llist
+ * for reuse and without waiting for a rcu_tasks_trace gp.
+ * The caller must first go through the rcu_tasks_trace gp for 'ptr'
+ * before calling bpf_mem_cache_raw_free().
+ * It could be used when the rcu_tasks_trace callback does not have
+ * a hold on the original bpf_mem_alloc object that allocated the
+ * 'ptr'. This should only be used in the uncommon code path.
+ * Otherwise, the bpf_mem_alloc's free_llist cannot be refilled
+ * and may affect performance.
+ */
+void bpf_mem_cache_raw_free(void *ptr)
+{
+ if (!ptr)
+ return;
+
+ kfree(ptr - LLIST_NODE_SZ);
+}
+
+/* When flags == GFP_KERNEL, it signals that the caller will not cause
+ * deadlock when using kmalloc. bpf_mem_cache_alloc_flags() will use
+ * kmalloc if the free_llist is empty.
+ */
+void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
+{
+ struct bpf_mem_cache *c;
+ void *ret;
+
+ c = this_cpu_ptr(ma->cache);
+
+ ret = unit_alloc(c);
+ if (!ret && flags == GFP_KERNEL) {
+ struct mem_cgroup *memcg, *old_memcg;
+
+ memcg = get_memcg(c);
+ old_memcg = set_active_memcg(memcg);
+ ret = __alloc(c, NUMA_NO_NODE, GFP_KERNEL | __GFP_NOWARN | __GFP_ACCOUNT);
+ set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
+ }
+
+ return !ret ? NULL : ret + LLIST_NODE_SZ;
+}
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index cb0f5a105b8941..085025c7130ae9 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -68,7 +68,7 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
{
- return bpf_local_storage_map_alloc(attr, &sk_cache);
+ return bpf_local_storage_map_alloc(attr, &sk_cache, false);
}
static int notsupp_get_next_key(struct bpf_map *map, void *key,
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index dc3827c1f139cd..d9c080ac17967b 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -278,6 +278,7 @@ extern struct argp bench_local_storage_argp;
extern struct argp bench_local_storage_rcu_tasks_trace_argp;
extern struct argp bench_strncmp_argp;
extern struct argp bench_hashmap_lookup_argp;
+extern struct argp bench_local_storage_create_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -288,6 +289,7 @@ static const struct argp_child bench_parsers[] = {
{ &bench_local_storage_rcu_tasks_trace_argp, 0,
"local_storage RCU Tasks Trace slowdown benchmark", 0 },
{ &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
+ { &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 },
{},
};
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
index f8b2a640ccbebc..abb0321d4f346f 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
@@ -3,19 +3,71 @@
#include <sys/types.h>
#include <sys/socket.h>
+#include <pthread.h>
+#include <argp.h>
#include "bench.h"
#include "bench_local_storage_create.skel.h"
-#define BATCH_SZ 32
-
struct thread {
- int fds[BATCH_SZ];
+ int *fds;
+ pthread_t *pthds;
+ int *pthd_results;
};
static struct bench_local_storage_create *skel;
static struct thread *threads;
-static long socket_errs;
+static long create_owner_errs;
+static int storage_type = BPF_MAP_TYPE_SK_STORAGE;
+static int batch_sz = 32;
+
+enum {
+ ARG_BATCH_SZ = 9000,
+ ARG_STORAGE_TYPE = 9001,
+};
+
+static const struct argp_option opts[] = {
+ { "batch-size", ARG_BATCH_SZ, "BATCH_SIZE", 0,
+ "The number of storage creations in each batch" },
+ { "storage-type", ARG_STORAGE_TYPE, "STORAGE_TYPE", 0,
+ "The type of local storage to test (socket or task)" },
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ int ret;
+
+ switch (key) {
+ case ARG_BATCH_SZ:
+ ret = atoi(arg);
+ if (ret < 1) {
+ fprintf(stderr, "invalid batch-size\n");
+ argp_usage(state);
+ }
+ batch_sz = ret;
+ break;
+ case ARG_STORAGE_TYPE:
+ if (!strcmp(arg, "task")) {
+ storage_type = BPF_MAP_TYPE_TASK_STORAGE;
+ } else if (!strcmp(arg, "socket")) {
+ storage_type = BPF_MAP_TYPE_SK_STORAGE;
+ } else {
+ fprintf(stderr, "invalid storage-type (socket or task)\n");
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_local_storage_create_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
static void validate(void)
{
@@ -28,6 +80,8 @@ static void validate(void)
static void setup(void)
{
+ int i;
+
skel = bench_local_storage_create__open_and_load();
if (!skel) {
fprintf(stderr, "error loading skel\n");
@@ -35,10 +89,16 @@ static void setup(void)
}
skel->bss->bench_pid = getpid();
-
- if (!bpf_program__attach(skel->progs.socket_post_create)) {
- fprintf(stderr, "Error attaching bpf program\n");
- exit(1);
+ if (storage_type == BPF_MAP_TYPE_SK_STORAGE) {
+ if (!bpf_program__attach(skel->progs.socket_post_create)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ exit(1);
+ }
+ } else {
+ if (!bpf_program__attach(skel->progs.fork)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ exit(1);
+ }
}
if (!bpf_program__attach(skel->progs.kmalloc)) {
@@ -52,6 +112,29 @@ static void setup(void)
fprintf(stderr, "cannot alloc thread_res\n");
exit(1);
}
+
+ for (i = 0; i < env.producer_cnt; i++) {
+ struct thread *t = &threads[i];
+
+ if (storage_type == BPF_MAP_TYPE_SK_STORAGE) {
+ t->fds = malloc(batch_sz * sizeof(*t->fds));
+ if (!t->fds) {
+ fprintf(stderr, "cannot alloc t->fds\n");
+ exit(1);
+ }
+ } else {
+ t->pthds = malloc(batch_sz * sizeof(*t->pthds));
+ if (!t->pthds) {
+ fprintf(stderr, "cannot alloc t->pthds\n");
+ exit(1);
+ }
+ t->pthd_results = malloc(batch_sz * sizeof(*t->pthd_results));
+ if (!t->pthd_results) {
+ fprintf(stderr, "cannot alloc t->pthd_results\n");
+ exit(1);
+ }
+ }
+ }
}
static void measure(struct bench_res *res)
@@ -65,20 +148,20 @@ static void *consumer(void *input)
return NULL;
}
-static void *producer(void *input)
+static void *sk_producer(void *input)
{
struct thread *t = &threads[(long)(input)];
int *fds = t->fds;
int i;
while (true) {
- for (i = 0; i < BATCH_SZ; i++) {
+ for (i = 0; i < batch_sz; i++) {
fds[i] = socket(AF_INET6, SOCK_DGRAM, 0);
if (fds[i] == -1)
- atomic_inc(&socket_errs);
+ atomic_inc(&create_owner_errs);
}
- for (i = 0; i < BATCH_SZ; i++) {
+ for (i = 0; i < batch_sz; i++) {
if (fds[i] != -1)
close(fds[i]);
}
@@ -87,6 +170,42 @@ static void *producer(void *input)
return NULL;
}
+static void *thread_func(void *arg)
+{
+ return NULL;
+}
+
+static void *task_producer(void *input)
+{
+ struct thread *t = &threads[(long)(input)];
+ pthread_t *pthds = t->pthds;
+ int *pthd_results = t->pthd_results;
+ int i;
+
+ while (true) {
+ for (i = 0; i < batch_sz; i++) {
+ pthd_results[i] = pthread_create(&pthds[i], NULL, thread_func, NULL);
+ if (pthd_results[i])
+ atomic_inc(&create_owner_errs);
+ }
+
+ for (i = 0; i < batch_sz; i++) {
+ if (!pthd_results[i])
+ pthread_join(pthds[i], NULL);;
+ }
+ }
+
+ return NULL;
+}
+
+static void *producer(void *input)
+{
+ if (storage_type == BPF_MAP_TYPE_SK_STORAGE)
+ return sk_producer(input);
+ else
+ return task_producer(input);
+}
+
static void report_progress(int iter, struct bench_res *res, long delta_ns)
{
double creates_per_sec, kmallocs_per_create;
@@ -123,14 +242,18 @@ static void report_final(struct bench_res res[], int res_cnt)
printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), ",
creates_mean, creates_stddev, creates_mean / env.producer_cnt);
printf("%4.2lf kmallocs/create\n", (double)total_kmallocs / total_creates);
- if (socket_errs || skel->bss->create_errs)
- printf("socket() errors %ld create_errs %ld\n", socket_errs,
+ if (create_owner_errs || skel->bss->create_errs)
+ printf("%s() errors %ld create_errs %ld\n",
+ storage_type == BPF_MAP_TYPE_SK_STORAGE ?
+ "socket" : "pthread_create",
+ create_owner_errs,
skel->bss->create_errs);
}
/* Benchmark performance of creating bpf local storage */
const struct bench bench_local_storage_create = {
.name = "local-storage-create",
+ .argp = &bench_local_storage_create_argp,
.validate = validate,
.setup = setup,
.producer_thread = producer,
diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index 563a9c746b7bc4..bcf2e1905ed71e 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -23,7 +23,7 @@ struct storage {
/* Fork and exec the provided rm binary and return the exit code of the
* forked process and its pid.
*/
-static int run_self_unlink(int *monitored_pid, const char *rm_path)
+static int run_self_unlink(struct local_storage *skel, const char *rm_path)
{
int child_pid, child_status, ret;
int null_fd;
@@ -35,7 +35,7 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path)
dup2(null_fd, STDERR_FILENO);
close(null_fd);
- *monitored_pid = getpid();
+ skel->bss->monitored_pid = getpid();
/* Use the copied /usr/bin/rm to delete itself
* /tmp/copy_of_rm /tmp/copy_of_rm.
*/
@@ -44,6 +44,7 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path)
exit(errno);
} else if (child_pid > 0) {
waitpid(child_pid, &child_status, 0);
+ ASSERT_EQ(skel->data->task_storage_result, 0, "task_storage_result");
return WEXITSTATUS(child_status);
}
@@ -133,7 +134,7 @@ void test_test_local_storage(void)
* unlink its executable. This operation should be denied by the loaded
* LSM program.
*/
- err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path);
+ err = run_self_unlink(skel, tmp_exec_path);
if (!ASSERT_EQ(err, EPERM, "run_self_unlink"))
goto close_prog_rmdir;
diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
index 2814bab54d28f5..7c851c9d5e47ca 100644
--- a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
+++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
@@ -22,6 +22,13 @@ struct {
__type(value, struct storage);
} sk_storage_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct storage);
+} task_storage_map SEC(".maps");
+
SEC("raw_tp/kmalloc")
int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags,
@@ -32,6 +39,24 @@ int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
return 0;
}
+SEC("tp_btf/sched_process_fork")
+int BPF_PROG(fork, struct task_struct *parent, struct task_struct *child)
+{
+ struct storage *stg;
+
+ if (parent->tgid != bench_pid)
+ return 0;
+
+ stg = bpf_task_storage_get(&task_storage_map, child, NULL,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (stg)
+ __sync_fetch_and_add(&create_cnts, 1);
+ else
+ __sync_fetch_and_add(&create_errs, 1);
+
+ return 0;
+}
+
SEC("lsm.s/socket_post_create")
int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
int protocol, int kern)
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index c8ba7207f5a580..bc8ea56671a16a 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -16,6 +16,7 @@ char _license[] SEC("license") = "GPL";
int monitored_pid = 0;
int inode_storage_result = -1;
int sk_storage_result = -1;
+int task_storage_result = -1;
struct local_storage {
struct inode *exec_inode;
@@ -50,26 +51,57 @@ struct {
__type(value, struct local_storage);
} task_storage_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct local_storage);
+} task_storage_map2 SEC(".maps");
+
SEC("lsm/inode_unlink")
int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct bpf_local_storage *local_storage;
struct local_storage *storage;
+ struct task_struct *task;
bool is_self_unlink;
if (pid != monitored_pid)
return 0;
- storage = bpf_task_storage_get(&task_storage_map,
- bpf_get_current_task_btf(), 0, 0);
- if (storage) {
- /* Don't let an executable delete itself */
- is_self_unlink = storage->exec_inode == victim->d_inode;
- if (is_self_unlink)
- return -EPERM;
- }
+ task = bpf_get_current_task_btf();
+ if (!task)
+ return 0;
- return 0;
+ task_storage_result = -1;
+
+ storage = bpf_task_storage_get(&task_storage_map, task, 0, 0);
+ if (!storage)
+ return 0;
+
+ /* Don't let an executable delete itself */
+ is_self_unlink = storage->exec_inode == victim->d_inode;
+
+ storage = bpf_task_storage_get(&task_storage_map2, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage || storage->value)
+ return 0;
+
+ if (bpf_task_storage_delete(&task_storage_map, task))
+ return 0;
+
+ /* Ensure that the task_storage_map is disconnected from the storage.
+ * The storage memory should not be freed back to the
+ * bpf_mem_alloc.
+ */
+ local_storage = task->bpf_storage;
+ if (!local_storage || local_storage->smap)
+ return 0;
+
+ task_storage_result = 0;
+
+ return is_self_unlink ? -EPERM : 0;
}
SEC("lsm.s/inode_rename")
@@ -139,11 +171,7 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
if (bpf_sk_storage_delete(&sk_storage_map, sock->sk))
return 0;
- /* Ensure that the sk_storage_map is disconnected from the storage.
- * The storage memory should not be freed back to the
- * bpf_mem_alloc of the sk_bpf_storage_map because
- * sk_bpf_storage_map may have been gone.
- */
+ /* Ensure that the sk_storage_map is disconnected from the storage. */
if (!sock->sk->sk_bpf_storage || sock->sk->sk_bpf_storage->smap)
return 0;