diff options
author | Vlastimil Babka <vbabka@suse.cz> | 2024-02-12 21:36:33 +0100 |
---|---|---|
committer | Vlastimil Babka <vbabka@suse.cz> | 2024-02-12 21:36:33 +0100 |
commit | e4d39d948cd5cea8989768ec08d7fdbd5171e918 (patch) | |
tree | decd73ccd5e12043e239fa8179a42451689335ae | |
parent | 41bccc98fb7931d63d03f326a746ac4d429c1dd3 (diff) | |
download | linux-BROKEN-locking-bench.tar.gz |
slub_kunit: hack in a stupid locking/atomics microbenchmarkBROKEN-locking-bench
-rw-r--r-- | arch/x86/lib/cmpxchg16b_emu.S | 2 | ||||
-rw-r--r-- | lib/slub_kunit.c | 245 |
2 files changed, 247 insertions, 0 deletions
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 6962df3157938d..d4ce28a2d8f9fb 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -52,3 +52,5 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) RET SYM_FUNC_END(this_cpu_cmpxchg16b_emu) + +EXPORT_SYMBOL(this_cpu_cmpxchg16b_emu) diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c index d4a3730b08fa7e..172d6650b9f87e 100644 --- a/lib/slub_kunit.c +++ b/lib/slub_kunit.c @@ -5,6 +5,7 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/kernel.h> +#include <asm/tsc.h> #include "../mm/slab.h" static struct kunit_resource resource; @@ -157,6 +158,248 @@ static void test_kmalloc_redzone_access(struct kunit *test) kmem_cache_destroy(s); } +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) +/* + * On SMP, spin_trylock is sufficient protection. + * On PREEMPT_RT, spin_trylock is equivalent on both SMP and UP. + */ +#define pcp_trylock_prepare(flags) do { } while (0) +#define pcp_trylock_finish(flag) do { } while (0) +#else + +/* UP spin_trylock always succeeds so disable IRQs to prevent re-entrancy. */ +#define pcp_trylock_prepare(flags) local_irq_save(flags) +#define pcp_trylock_finish(flags) local_irq_restore(flags) +#endif + +/* + * Locking a pcp requires a PCP lookup followed by a spinlock. To avoid + * a migration causing the wrong PCP to be locked and remote memory being + * potentially allocated, pin the task to the CPU for the lookup+lock. + * preempt_disable is used on !RT because it is faster than migrate_disable. + * migrate_disable is used on RT because otherwise RT spinlock usage is + * interfered with and a high priority task cannot preempt the allocator. + */ +#ifndef CONFIG_PREEMPT_RT +#define pcpu_task_pin() preempt_disable() +#define pcpu_task_unpin() preempt_enable() +#else +#define pcpu_task_pin() migrate_disable() +#define pcpu_task_unpin() migrate_enable() +#endif + +/* + * Generic helper to lookup and a per-cpu variable with an embedded spinlock. + * Return value should be used with equivalent unlock helper. + */ +#define pcpu_spin_lock(type, member, ptr) \ +({ \ + type *_ret; \ + pcpu_task_pin(); \ + _ret = this_cpu_ptr(ptr); \ + spin_lock(&_ret->member); \ + _ret; \ +}) + +#define pcpu_spin_trylock(type, member, ptr) \ +({ \ + type *_ret; \ + pcpu_task_pin(); \ + _ret = this_cpu_ptr(ptr); \ + if (!spin_trylock(&_ret->member)) { \ + pcpu_task_unpin(); \ + _ret = NULL; \ + } \ + _ret; \ +}) + +#define pcpu_spin_unlock(member, ptr) \ +({ \ + spin_unlock(&ptr->member); \ + pcpu_task_unpin(); \ +}) + +typedef union { + struct { + unsigned long counter; + void *dummy; + }; + u128 full; +} counter_ptr_t; + +struct test_pcp { + local_lock_t llock; + spinlock_t slock; + unsigned long counter; + counter_ptr_t counter_ptr; +}; + +static bool __dummy; + +static DEFINE_PER_CPU(struct test_pcp, test_pcps) = { + .llock = INIT_LOCAL_LOCK(llock), + .slock = __SPIN_LOCK_UNLOCKED(stock_lock), +}; + +#define TIMING_ITERATIONS 1000000000 + +static void test_lock_timings(struct kunit *test) +{ + unsigned long long before, after; + unsigned long __maybe_unused UP_flags; + struct test_pcp *pcp; + unsigned long flags; + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + if (this_cpu_inc_return(test_pcps.counter) == 0) + __dummy = true; + } + + after = rdtsc_ordered(); + + pr_info("%-25s %12llu cycles", "this_cpu_inc_return", after - before); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + unsigned long old, new; + do { + old = this_cpu_read(test_pcps.counter); + new = old + 1; + } while (!this_cpu_try_cmpxchg(test_pcps.counter, &old, new)); + } + + after = rdtsc_ordered(); + + pr_info("%-25s %12llu cycles", "this_cpu_try_cmpxchg", after - before); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + unsigned long old, new; + do { + old = raw_cpu_read(test_pcps.counter); + new = old + 1; + } while (!this_cpu_try_cmpxchg(test_pcps.counter, &old, new)); + } + + after = rdtsc_ordered(); + + pr_info("%-25s %12llu cycles", "raw+this_cpu_try_cmpxchg", after - before); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + counter_ptr_t old, new; + do { + struct test_pcp *pcp = raw_cpu_ptr(&test_pcps); + old.full = pcp->counter_ptr.full; + new.counter = old.counter + 1; + new.dummy = old.dummy; + } while (!this_cpu_try_cmpxchg128(test_pcps.counter_ptr.full, + &old.full, new.full)); + } + + after = rdtsc_ordered(); + + pr_info("%-25s %12llu cycles", "this_cpu_try_cmpxchg128", after - before); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + local_lock(&test_pcps.llock); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + local_unlock(&test_pcps.llock); + } + + after = rdtsc_ordered(); + + pr_info("%-25s %12llu cycles", "local_lock", after - before); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + if (likely(!in_interrupt())) { + local_lock(&test_pcps.llock); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + local_unlock(&test_pcps.llock); + } + } + + after = rdtsc_ordered(); + + pr_info("%-25s %12llu cycles", "local_lock+in_intr()", after - before); + + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + local_lock_irq(&test_pcps.llock); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + local_unlock_irq(&test_pcps.llock); + } + + after = rdtsc_ordered(); + + cond_resched(); + + pr_info("%-25s %12llu cycles", "local_lock_irq", after - before); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + local_lock_irqsave(&test_pcps.llock, flags); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + local_unlock_irqrestore(&test_pcps.llock, flags); + } + + after = rdtsc_ordered(); + + cond_resched(); + + pr_info("%-25s %12llu cycles", "local_lock_irqsave", after - before); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + + pcp_trylock_prepare(UP_flags); + + pcp = pcpu_spin_trylock(struct test_pcp, slock, &test_pcps); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + pcpu_spin_unlock(slock, pcp); + pcp_trylock_finish(UP_flags); + } + + after = rdtsc_ordered(); + + cond_resched(); + + pr_info("%-25s %12llu cycles", "pcpu_spin_trylock", after - before); +} + static int test_init(struct kunit *test) { slab_errors = 0; @@ -177,6 +420,8 @@ static struct kunit_case test_cases[] = { KUNIT_CASE(test_clobber_redzone_free), KUNIT_CASE(test_kmalloc_redzone_access), + + KUNIT_CASE(test_lock_timings), {} }; |