aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2024-02-12 21:36:33 +0100
committerVlastimil Babka <vbabka@suse.cz>2024-02-12 21:36:33 +0100
commite4d39d948cd5cea8989768ec08d7fdbd5171e918 (patch)
treedecd73ccd5e12043e239fa8179a42451689335ae
parent41bccc98fb7931d63d03f326a746ac4d429c1dd3 (diff)
downloadlinux-BROKEN-locking-bench.tar.gz
slub_kunit: hack in a stupid locking/atomics microbenchmarkBROKEN-locking-bench
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S2
-rw-r--r--lib/slub_kunit.c245
2 files changed, 247 insertions, 0 deletions
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 6962df3157938d..d4ce28a2d8f9fb 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -52,3 +52,5 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
RET
SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
+
+EXPORT_SYMBOL(this_cpu_cmpxchg16b_emu)
diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
index d4a3730b08fa7e..172d6650b9f87e 100644
--- a/lib/slub_kunit.c
+++ b/lib/slub_kunit.c
@@ -5,6 +5,7 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/kernel.h>
+#include <asm/tsc.h>
#include "../mm/slab.h"
static struct kunit_resource resource;
@@ -157,6 +158,248 @@ static void test_kmalloc_redzone_access(struct kunit *test)
kmem_cache_destroy(s);
}
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
+/*
+ * On SMP, spin_trylock is sufficient protection.
+ * On PREEMPT_RT, spin_trylock is equivalent on both SMP and UP.
+ */
+#define pcp_trylock_prepare(flags) do { } while (0)
+#define pcp_trylock_finish(flag) do { } while (0)
+#else
+
+/* UP spin_trylock always succeeds so disable IRQs to prevent re-entrancy. */
+#define pcp_trylock_prepare(flags) local_irq_save(flags)
+#define pcp_trylock_finish(flags) local_irq_restore(flags)
+#endif
+
+/*
+ * Locking a pcp requires a PCP lookup followed by a spinlock. To avoid
+ * a migration causing the wrong PCP to be locked and remote memory being
+ * potentially allocated, pin the task to the CPU for the lookup+lock.
+ * preempt_disable is used on !RT because it is faster than migrate_disable.
+ * migrate_disable is used on RT because otherwise RT spinlock usage is
+ * interfered with and a high priority task cannot preempt the allocator.
+ */
+#ifndef CONFIG_PREEMPT_RT
+#define pcpu_task_pin() preempt_disable()
+#define pcpu_task_unpin() preempt_enable()
+#else
+#define pcpu_task_pin() migrate_disable()
+#define pcpu_task_unpin() migrate_enable()
+#endif
+
+/*
+ * Generic helper to lookup and a per-cpu variable with an embedded spinlock.
+ * Return value should be used with equivalent unlock helper.
+ */
+#define pcpu_spin_lock(type, member, ptr) \
+({ \
+ type *_ret; \
+ pcpu_task_pin(); \
+ _ret = this_cpu_ptr(ptr); \
+ spin_lock(&_ret->member); \
+ _ret; \
+})
+
+#define pcpu_spin_trylock(type, member, ptr) \
+({ \
+ type *_ret; \
+ pcpu_task_pin(); \
+ _ret = this_cpu_ptr(ptr); \
+ if (!spin_trylock(&_ret->member)) { \
+ pcpu_task_unpin(); \
+ _ret = NULL; \
+ } \
+ _ret; \
+})
+
+#define pcpu_spin_unlock(member, ptr) \
+({ \
+ spin_unlock(&ptr->member); \
+ pcpu_task_unpin(); \
+})
+
+typedef union {
+ struct {
+ unsigned long counter;
+ void *dummy;
+ };
+ u128 full;
+} counter_ptr_t;
+
+struct test_pcp {
+ local_lock_t llock;
+ spinlock_t slock;
+ unsigned long counter;
+ counter_ptr_t counter_ptr;
+};
+
+static bool __dummy;
+
+static DEFINE_PER_CPU(struct test_pcp, test_pcps) = {
+ .llock = INIT_LOCAL_LOCK(llock),
+ .slock = __SPIN_LOCK_UNLOCKED(stock_lock),
+};
+
+#define TIMING_ITERATIONS 1000000000
+
+static void test_lock_timings(struct kunit *test)
+{
+ unsigned long long before, after;
+ unsigned long __maybe_unused UP_flags;
+ struct test_pcp *pcp;
+ unsigned long flags;
+
+ before = rdtsc_ordered();
+
+ for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
+ if (this_cpu_inc_return(test_pcps.counter) == 0)
+ __dummy = true;
+ }
+
+ after = rdtsc_ordered();
+
+ pr_info("%-25s %12llu cycles", "this_cpu_inc_return", after - before);
+
+ before = rdtsc_ordered();
+
+ for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
+ unsigned long old, new;
+ do {
+ old = this_cpu_read(test_pcps.counter);
+ new = old + 1;
+ } while (!this_cpu_try_cmpxchg(test_pcps.counter, &old, new));
+ }
+
+ after = rdtsc_ordered();
+
+ pr_info("%-25s %12llu cycles", "this_cpu_try_cmpxchg", after - before);
+
+ before = rdtsc_ordered();
+
+ for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
+ unsigned long old, new;
+ do {
+ old = raw_cpu_read(test_pcps.counter);
+ new = old + 1;
+ } while (!this_cpu_try_cmpxchg(test_pcps.counter, &old, new));
+ }
+
+ after = rdtsc_ordered();
+
+ pr_info("%-25s %12llu cycles", "raw+this_cpu_try_cmpxchg", after - before);
+
+ before = rdtsc_ordered();
+
+ for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
+ counter_ptr_t old, new;
+ do {
+ struct test_pcp *pcp = raw_cpu_ptr(&test_pcps);
+ old.full = pcp->counter_ptr.full;
+ new.counter = old.counter + 1;
+ new.dummy = old.dummy;
+ } while (!this_cpu_try_cmpxchg128(test_pcps.counter_ptr.full,
+ &old.full, new.full));
+ }
+
+ after = rdtsc_ordered();
+
+ pr_info("%-25s %12llu cycles", "this_cpu_try_cmpxchg128", after - before);
+
+ before = rdtsc_ordered();
+
+ for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
+ local_lock(&test_pcps.llock);
+
+ pcp = this_cpu_ptr(&test_pcps);
+
+ pcp->counter++;
+
+ local_unlock(&test_pcps.llock);
+ }
+
+ after = rdtsc_ordered();
+
+ pr_info("%-25s %12llu cycles", "local_lock", after - before);
+
+ before = rdtsc_ordered();
+
+ for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
+ if (likely(!in_interrupt())) {
+ local_lock(&test_pcps.llock);
+
+ pcp = this_cpu_ptr(&test_pcps);
+
+ pcp->counter++;
+
+ local_unlock(&test_pcps.llock);
+ }
+ }
+
+ after = rdtsc_ordered();
+
+ pr_info("%-25s %12llu cycles", "local_lock+in_intr()", after - before);
+
+
+ before = rdtsc_ordered();
+
+ for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
+ local_lock_irq(&test_pcps.llock);
+
+ pcp = this_cpu_ptr(&test_pcps);
+
+ pcp->counter++;
+
+ local_unlock_irq(&test_pcps.llock);
+ }
+
+ after = rdtsc_ordered();
+
+ cond_resched();
+
+ pr_info("%-25s %12llu cycles", "local_lock_irq", after - before);
+
+ before = rdtsc_ordered();
+
+ for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
+ local_lock_irqsave(&test_pcps.llock, flags);
+
+ pcp = this_cpu_ptr(&test_pcps);
+
+ pcp->counter++;
+
+ local_unlock_irqrestore(&test_pcps.llock, flags);
+ }
+
+ after = rdtsc_ordered();
+
+ cond_resched();
+
+ pr_info("%-25s %12llu cycles", "local_lock_irqsave", after - before);
+
+ before = rdtsc_ordered();
+
+ for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) {
+
+ pcp_trylock_prepare(UP_flags);
+
+ pcp = pcpu_spin_trylock(struct test_pcp, slock, &test_pcps);
+
+ pcp = this_cpu_ptr(&test_pcps);
+
+ pcp->counter++;
+
+ pcpu_spin_unlock(slock, pcp);
+ pcp_trylock_finish(UP_flags);
+ }
+
+ after = rdtsc_ordered();
+
+ cond_resched();
+
+ pr_info("%-25s %12llu cycles", "pcpu_spin_trylock", after - before);
+}
+
static int test_init(struct kunit *test)
{
slab_errors = 0;
@@ -177,6 +420,8 @@ static struct kunit_case test_cases[] = {
KUNIT_CASE(test_clobber_redzone_free),
KUNIT_CASE(test_kmalloc_redzone_access),
+
+ KUNIT_CASE(test_lock_timings),
{}
};