diff options
author | Will Deacon <will.deacon@arm.com> | 2017-10-16 10:13:48 +0100 |
---|---|---|
committer | Will Deacon <will.deacon@arm.com> | 2017-10-16 10:13:48 +0100 |
commit | 658230d9740e71b66a065bc963a39c8633e12b4c (patch) | |
tree | d91cee58069cf70069dadbba8fb5e05a8174bccd | |
download | qrwlock-rmem-658230d9740e71b66a065bc963a39c8633e12b4c.tar.gz |
First steps towards userspace qrwlock
Signed-off-by: Will Deacon <will.deacon@arm.com>
-rw-r--r-- | Makefile | 12 | ||||
-rw-r--r-- | kernel.h | 281 | ||||
-rw-r--r-- | main.c | 12 | ||||
-rw-r--r-- | qrwlock.c | 84 | ||||
-rw-r--r-- | qrwlock.h | 134 |
5 files changed, 523 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2c2a6d1 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +CC=aarch64-linux-gnu-gcc +CFLAGS=-Wall -O2 -fno-strict-aliasing +TARGET=qrwlock +OBJS= main.o qrwlock.o + +$(TARGET) : $(OBJS) +all: $(TARGET) + +clean: + rm -f $(TARGET) $(OBJS) + +.PHONY: clean diff --git a/kernel.h b/kernel.h new file mode 100644 index 0000000..8c061e3 --- /dev/null +++ b/kernel.h @@ -0,0 +1,281 @@ +/* + * Kernel internals needed by qrwlock for standalone implementation. + */ + +#ifndef __KERNEL_H +#define __KERNEL_H + +#include <stdlib.h> /* abort() */ + +/* Compiler bits */ +#define __branch_check__(x, expect, is_constant) ({ \ + int ______r; \ + ______r = __builtin_expect(!!(x), expect); \ + ______r; \ + }) + +#define likely(x) (__branch_check__(x, 1, __builtin_constant_p(x))) +#define unlikely(x) (__branch_check__(x, 0, __builtin_constant_p(x))) +#define __force +#define __aligned(x) __attribute__((aligned(x))) + +/* Type definitions */ +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +typedef struct { + int counter; +} atomic_t; + +#define ATOMIC_INIT(i) { (i) } + +typedef struct { + u16 owner; + u16 next; +} __aligned(4) arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 , 0 } + +typedef struct qrwlock { + union { + atomic_t cnts; + struct { + u8 wlocked; /* Locked for write? */ + u8 __lstate[3]; + }; + }; + arch_spinlock_t wait_lock; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { \ + { .cnts = ATOMIC_INIT(0), }, \ + .wait_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ +} + +/* + * TODO: We should do something more interesting here, since this affects + * the reader slowpath. + */ +#define in_interrupt() 0 + +/* READ_ONCE */ +#define __READ_ONCE_SIZE \ +({ \ + switch (size) { \ + case 1: *(u8 *)res = *(volatile u8 *)p; break; \ + case 2: *(u16 *)res = *(volatile u16 *)p; break; \ + case 4: *(u32 *)res = *(volatile u32 *)p; break; \ + case 8: *(u64 *)res = *(volatile u64 *)p; break; \ + default: \ + abort(); /* hack for userspace */ \ + } \ +}) + +static __always_inline +void __read_once_size(const volatile void *p, void *res, int size) +{ + __READ_ONCE_SIZE; +} + +#define __READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) + +#define READ_ONCE(x) __READ_ONCE(x) + +/* cmpxchg */ +#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl) \ +static inline unsigned long \ + __cmpxchg_case_##name(volatile void *ptr, \ + unsigned long old, \ + unsigned long new) \ +{ \ + unsigned long tmp, oldval; \ + \ + asm volatile( \ + " prfm pstl1strm, %[v]\n" \ + "1: ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n" \ + " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ + " cbnz %" #w "[tmp], 2f\n" \ + " st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \ + " cbnz %w[tmp], 1b\n" \ + " " #mb "\n" \ + "2:" \ + : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \ + [v] "+Q" (*(unsigned long *)ptr) \ + : [old] "Lr" (old), [new] "r" (new) \ + : cl); \ + \ + return oldval; \ +} \ + +__CMPXCHG_CASE(w, b, 1, , , , ) +__CMPXCHG_CASE(w, h, 2, , , , ) +__CMPXCHG_CASE(w, , 4, , , , ) +__CMPXCHG_CASE( , , 8, , , , ) +__CMPXCHG_CASE(w, b, acq_1, , a, , "memory") +__CMPXCHG_CASE(w, h, acq_2, , a, , "memory") +__CMPXCHG_CASE(w, , acq_4, , a, , "memory") +__CMPXCHG_CASE( , , acq_8, , a, , "memory") + +#define __CMPXCHG_GEN(sfx) \ +static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ + unsigned long old, \ + unsigned long new, \ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __cmpxchg_case##sfx##_1(ptr, (u8)old, new); \ + case 2: \ + return __cmpxchg_case##sfx##_2(ptr, (u16)old, new); \ + case 4: \ + return __cmpxchg_case##sfx##_4(ptr, old, new); \ + case 8: \ + return __cmpxchg_case##sfx##_8(ptr, old, new); \ + default: \ + abort(); /* hack for userspace */ \ + } \ +} + +__CMPXCHG_GEN() +__CMPXCHG_GEN(_acq) + +#define __cmpxchg_wrapper(sfx, ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg##sfx((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ +}) + +#define cmpxchg_relaxed(...) __cmpxchg_wrapper( , __VA_ARGS__) +#define cmpxchg_acquire(...) __cmpxchg_wrapper(_acq, __VA_ARGS__) + +/* load-acquire/store-release */ +#define __smp_store_release(p, v) \ +do { \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(*p)) (v) }; \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("stlrb %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(u8 *)__u.__c) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile ("stlrh %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(u16 *)__u.__c) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile ("stlr %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(u32 *)__u.__c) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile ("stlr %1, %0" \ + : "=Q" (*p) \ + : "r" (*(u64 *)__u.__c) \ + : "memory"); \ + break; \ + } \ +} while (0) +#define smp_store_release(p, v) __smp_store_release(p, v) + +#define __smp_load_acquire(p) \ +({ \ + union { typeof(*p) __val; char __c[1]; } __u; \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("ldarb %w0, %1" \ + : "=r" (*(u8 *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 2: \ + asm volatile ("ldarh %w0, %1" \ + : "=r" (*(u16 *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 4: \ + asm volatile ("ldar %w0, %1" \ + : "=r" (*(u32 *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 8: \ + asm volatile ("ldar %0, %1" \ + : "=r" (*(u64 *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + } \ + __u.__val; \ +}) +#define smp_load_acquire(p) __smp_load_acquire(p) + +/* smp_cond_load_acquire */ +#define smp_cond_load_acquire(ptr, cond_expr) \ +({ \ + typeof(ptr) __PTR = (ptr); \ + typeof(*ptr) VAL; \ + for (;;) { \ + VAL = smp_load_acquire(__PTR); \ + if (cond_expr) \ + break; \ +/* TODO __cmpwait_relaxed(__PTR, VAL); */ \ + } \ + VAL; \ +}) + +/* Atomics */ +#define atomic_read(v) READ_ONCE((v)->counter) + +static inline void atomic_add(int i, atomic_t *v) +{ + /* TODO */ +} + +static inline void atomic_sub(int i, atomic_t *v) +{ + /* TODO */ +} + +static inline int atomic_add_return_acquire(int i, atomic_t *v) +{ + /* TODO */ + return i; +} + +static inline int atomic_sub_return_release(int i, atomic_t *v) +{ + /* TODO */ + return i; +} + +#define atomic_cmpxchg_relaxed(v, old, new) \ + cmpxchg_relaxed(&((v)->counter), (old), (new)) +#define atomic_cmpxchg_acquire(v, old, new) \ + cmpxchg_acquire(&((v)->counter), (old), (new)) + +#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) + +/* Ticket spinlock */ +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + /* TODO */ +} + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + /* TODO */ +} + +#endif /* __KERNEL_H */ @@ -0,0 +1,12 @@ +#include "qrwlock.h" + +struct qrwlock lock; + +int main(void) +{ + arch_read_lock(&lock); + arch_read_unlock(&lock); + arch_write_lock(&lock); + arch_write_unlock(&lock); + return 0; +} diff --git a/qrwlock.c b/qrwlock.c new file mode 100644 index 0000000..098c1f1 --- /dev/null +++ b/qrwlock.c @@ -0,0 +1,84 @@ +/* + * Queued read/write locks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P. + * + * Authors: Waiman Long <waiman.long@hp.com> + */ +#include "qrwlock.h" + +/** + * queued_read_lock_slowpath - acquire read lock of a queue rwlock + * @lock: Pointer to queue rwlock structure + */ +void queued_read_lock_slowpath(struct qrwlock *lock) +{ + /* + * Readers come here when they cannot get the lock without waiting + */ + if (unlikely(in_interrupt())) { + /* + * Readers in interrupt context will get the lock immediately + * if the writer is just waiting (not holding the lock yet), + * so spin with ACQUIRE semantics until the lock is available + * without waiting in the queue. + */ + atomic_cond_read_acquire(&lock->cnts, !(VAL & _QW_LOCKED)); + return; + } + atomic_sub(_QR_BIAS, &lock->cnts); + + /* + * Put the reader into the wait queue + */ + arch_spin_lock(&lock->wait_lock); + atomic_add(_QR_BIAS, &lock->cnts); + + /* + * The ACQUIRE semantics of the following spinning code ensure + * that accesses can't leak upwards out of our subsequent critical + * section in the case that the lock is currently held for write. + */ + atomic_cond_read_acquire(&lock->cnts, !(VAL & _QW_LOCKED)); + + /* + * Signal the next one in queue to become queue head + */ + arch_spin_unlock(&lock->wait_lock); +} + +/** + * queued_write_lock_slowpath - acquire write lock of a queue rwlock + * @lock : Pointer to queue rwlock structure + */ +void queued_write_lock_slowpath(struct qrwlock *lock) +{ + /* Put the writer into the wait queue */ + arch_spin_lock(&lock->wait_lock); + + /* Try to acquire the lock directly if no reader is present */ + if (!atomic_read(&lock->cnts) && + (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)) + goto unlock; + + /* Set the waiting flag to notify readers that a writer is pending */ + atomic_add(_QW_WAITING, &lock->cnts); + + /* When no more readers or writers, set the locked flag */ + do { + atomic_cond_read_acquire(&lock->cnts, VAL == _QW_WAITING); + } while (atomic_cmpxchg_relaxed(&lock->cnts, _QW_WAITING, + _QW_LOCKED) != _QW_WAITING); +unlock: + arch_spin_unlock(&lock->wait_lock); +} diff --git a/qrwlock.h b/qrwlock.h new file mode 100644 index 0000000..cda43be --- /dev/null +++ b/qrwlock.h @@ -0,0 +1,134 @@ +/* + * Queue read/write lock + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P. + * + * Authors: Waiman Long <waiman.long@hp.com> + */ +#ifndef __ASM_GENERIC_QRWLOCK_H +#define __ASM_GENERIC_QRWLOCK_H + +#include "kernel.h" + +/* + * Writer states & reader shift and bias. + */ +#define _QW_WAITING 0x100 /* A writer is waiting */ +#define _QW_LOCKED 0x0ff /* A writer holds the lock */ +#define _QW_WMASK 0x1ff /* Writer mask */ +#define _QR_SHIFT 9 /* Reader count shift */ +#define _QR_BIAS (1U << _QR_SHIFT) + +/* + * External function declarations + */ +extern void queued_read_lock_slowpath(struct qrwlock *lock); +extern void queued_write_lock_slowpath(struct qrwlock *lock); + +/** + * queued_read_trylock - try to acquire read lock of a queue rwlock + * @lock : Pointer to queue rwlock structure + * Return: 1 if lock acquired, 0 if failed + */ +static inline int queued_read_trylock(struct qrwlock *lock) +{ + u32 cnts; + + cnts = atomic_read(&lock->cnts); + if (likely(!(cnts & _QW_WMASK))) { + cnts = (u32)atomic_add_return_acquire(_QR_BIAS, &lock->cnts); + if (likely(!(cnts & _QW_WMASK))) + return 1; + atomic_sub(_QR_BIAS, &lock->cnts); + } + return 0; +} + +/** + * queued_write_trylock - try to acquire write lock of a queue rwlock + * @lock : Pointer to queue rwlock structure + * Return: 1 if lock acquired, 0 if failed + */ +static inline int queued_write_trylock(struct qrwlock *lock) +{ + u32 cnts; + + cnts = atomic_read(&lock->cnts); + if (unlikely(cnts)) + return 0; + + return likely(atomic_cmpxchg_acquire(&lock->cnts, + cnts, cnts | _QW_LOCKED) == cnts); +} +/** + * queued_read_lock - acquire read lock of a queue rwlock + * @lock: Pointer to queue rwlock structure + */ +static inline void queued_read_lock(struct qrwlock *lock) +{ + u32 cnts; + + cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts); + if (likely(!(cnts & _QW_WMASK))) + return; + + /* The slowpath will decrement the reader count, if necessary. */ + queued_read_lock_slowpath(lock); +} + +/** + * queued_write_lock - acquire write lock of a queue rwlock + * @lock : Pointer to queue rwlock structure + */ +static inline void queued_write_lock(struct qrwlock *lock) +{ + /* Optimize for the unfair lock case where the fair flag is 0. */ + if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0) + return; + + queued_write_lock_slowpath(lock); +} + +/** + * queued_read_unlock - release read lock of a queue rwlock + * @lock : Pointer to queue rwlock structure + */ +static inline void queued_read_unlock(struct qrwlock *lock) +{ + /* + * Atomically decrement the reader count + */ + (void)atomic_sub_return_release(_QR_BIAS, &lock->cnts); +} + +/** + * queued_write_unlock - release write lock of a queue rwlock + * @lock : Pointer to queue rwlock structure + */ +static inline void queued_write_unlock(struct qrwlock *lock) +{ + smp_store_release(&lock->wlocked, 0); +} + +/* + * Remapping rwlock architecture specific functions to the corresponding + * queue rwlock functions. + */ +#define arch_read_lock(l) queued_read_lock(l) +#define arch_write_lock(l) queued_write_lock(l) +#define arch_read_trylock(l) queued_read_trylock(l) +#define arch_write_trylock(l) queued_write_trylock(l) +#define arch_read_unlock(l) queued_read_unlock(l) +#define arch_write_unlock(l) queued_write_unlock(l) + +#endif /* __ASM_GENERIC_QRWLOCK_H */ |