diff options
author | Jean-Philippe Brucker <jean-philippe.brucker@arm.com> | 2015-12-10 19:24:59 +0000 |
---|---|---|
committer | Mark Rutland <mark.rutland@arm.com> | 2016-06-15 10:27:35 +0100 |
commit | 50eb940341fe22e2a3e36d9d8d06ad40b8b961f0 (patch) | |
tree | 6e30e03142b34331126aa307076309ce54558efe | |
parent | 5714dbb8b62ac25f336e5256dcc2fd8fe7c37b1b (diff) | |
download | boot-wrapper-aarch64-50eb940341fe22e2a3e36d9d8d06ad40b8b961f0.tar.gz |
Replace exclusive accesses with a bakery lock
The commit prepares the removal of the MMU identity map, which was only
used for exclusive accesses (ldxr/stxr) in psci_cpu_on.
Instead of relying on exclusives, we assume that when stage-1
translation is disabled, all EL3 memory accesses have Device type, with
non-gathering and non-reordering attributes. This guarantees single-copy
atomicity of aligned halfword accesses, as per the ARM ARM.
We can thus switch to a less constrained (albeit bulkier) locking
mechanism. This patch implements Lamport's bakery lock, which doesn't
rely on atomic compare-and-swap primitives.
For bisectability, we remove the call to switch_to_idmap here.
Otherwise, the assertions made by the bakery lock code, regarding order
of accesses, are invalid. The rest of the code will be removed in a
subsequent patch.
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | arch/aarch64/include/asm/psci.h | 40 | ||||
-rw-r--r-- | arch/aarch64/psci.S | 1 | ||||
-rw-r--r-- | bakery_lock.c | 137 | ||||
-rw-r--r-- | include/bakery_lock.h | 48 | ||||
-rw-r--r-- | include/compiler.h | 1 | ||||
-rw-r--r-- | include/cpu.h | 1 | ||||
-rw-r--r-- | include/psci.h | 2 | ||||
-rw-r--r-- | psci.c | 20 |
9 files changed, 204 insertions, 48 deletions
diff --git a/Makefile.am b/Makefile.am index 70dedc2..fb42c45 100644 --- a/Makefile.am +++ b/Makefile.am @@ -92,7 +92,7 @@ CFLAGS += -Wall -fomit-frame-pointer CFLAGS += -ffunction-sections -fdata-sections LDFLAGS += --gc-sections -OFILES += boot_common.o ns.o $(GIC) cache.o lib.o +OFILES += boot_common.o bakery_lock.o ns.o $(GIC) cache.o lib.o OFILES += $(addprefix $(ARCH_SRC),boot.o stack.o mmu.o $(BOOTMETHOD) utils.o) all: $(IMAGE) diff --git a/arch/aarch64/include/asm/psci.h b/arch/aarch64/include/asm/psci.h deleted file mode 100644 index ddab76c..0000000 --- a/arch/aarch64/include/asm/psci.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * arch/aarch64/include/asm/psci.h - * - * Copyright (C) 2015 ARM Limited. All rights reserved. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE.txt file. - */ -#ifndef __ASM_AARCH64_PSCI_H -#define __ASM_AARCH64_PSCI_H - -#ifndef __ASSEMBLY__ - -static inline int psci_store_address(unsigned long address, - unsigned long *branch_entry) -{ - unsigned long ret; - - asm volatile ( - "1:\n" - "ldxr %0, [%2]\n" - "subs %0, %0, %3\n" - "b.ne 2f\n" - "stxr %w0, %1, [%2]\n" - "cbnz %w0, 1b\n" - "2:\n" - : "=&r" (ret) - : "r" (address), "r" (branch_entry), "J" (PSCI_ADDR_INVALID) - : "cc"); - - if (ret != 0) - /* ret value comes from subs */ - return PSCI_RET_ALREADY_ON; - - return PSCI_RET_SUCCESS; -} - -#endif /* !__ASSEMBLY__ */ - -#endif diff --git a/arch/aarch64/psci.S b/arch/aarch64/psci.S index b8d07e5..5f7e2d7 100644 --- a/arch/aarch64/psci.S +++ b/arch/aarch64/psci.S @@ -102,7 +102,6 @@ smc_exit: start_el3: ldr x0, =vector bl setup_vector - bl switch_to_idmap /* only boot the primary cpu (entry 0 in the table) */ cpuid x0, x1 diff --git a/bakery_lock.c b/bakery_lock.c new file mode 100644 index 0000000..877118c --- /dev/null +++ b/bakery_lock.c @@ -0,0 +1,137 @@ +/* + * bakery_lock.c - Lamport's bakery algorithm + * + * Copyright (C) 2015 ARM Limited. All rights reserved. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE.txt file. + * + * + * Simplest implementation of Lamport's bakery lock [1]. Applies only to device + * memory with attributes non-gathering and non-reordering. + * + * This algorithm's strength resides in the fact that it doesn't rely on + * hardware synchronisation mechanisms and as such, doesn't require normal + * cacheable memory on ARMv8. CPUs write only to their own memory locations, + * and read from all other CPUs' ones, in order to decide whose turn it is to + * have the lock. + * + * The algorithm correctness is based on the following assumptions: + * + * 1) Accesses to choosing[k] (here tickets[k].choosing) are done atomically. + * In other words, simultaneous read and write to choosing[k] do not occur. + * In this implementation, it is guaranteed by single-copy atomicity, for + * accesses of type Device with non-gathering attributes. The algorithm + * doesn't require accesses to number[k] to be atomic, even though this + * implementation guarantees that as well. + * + * 2) Storage of number[k] allows it to become large enough for practical use of + * the lock. Indeed, if the lock is contended all of the time, the value of + * max(number[1..N]) will keep increasing, and this algorithm doesn't handle + * wrapping of the ticket number. In this implementation, we assume that we + * will never reach 32766 (0x7fff) overlapping calls to bakery_lock. + * + * [1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming Problem" + */ + +#include <bakery_lock.h> +#include <cpu.h> + +/* + * Return the result of (number_a, cpu_a) < (number_b, cpu_b) + */ +static unsigned int less_than(unsigned long cpu_a, unsigned long number_a, + unsigned long cpu_b, unsigned long number_b) +{ + if (number_a == number_b) + return cpu_a < cpu_b; + + return number_a < number_b; +} + +static unsigned int choose_number(bakery_ticket_t *tickets, unsigned self) +{ + int cpu; + unsigned int max_number = 0; + bakery_ticket_t ticket; + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (cpu == self) + continue; + + ticket = read_ticket_once(tickets[cpu]); + + if (max_number < ticket.number) + max_number = ticket.number; + } + + return 1 + max_number; +} + +/** + * Wait for our turn to enter a critical section + * + * @tickets: array of size NR_CPUS, indexed by logical IDs. + * @self: logical ID of the current CPU + * + * Note: since this implementation assumes that all loads and stores to tickets + * are of Device type with non-gathering and non-reordering attributes, we + * expect all of them to be performed, in program order. As a result, the + * following function is pretty relaxed in terms of barriers: we only + * synchronize before sev(), and introduce system-wide memory barriers around + * the critical section. + */ +void bakery_lock(bakery_ticket_t *tickets, unsigned self) +{ + int cpu, number_self; + bakery_ticket_t ticket; + + /* Doorway */ + write_ticket_once(tickets[self], 1, 0); + number_self = choose_number(tickets, self); + write_ticket_once(tickets[self], 0, number_self); + + dsb(st); + sev(); + + /* Bakery */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + uint16_t number_cpu; + + if (cpu == self) + continue; + + ticket = read_ticket_once(tickets[cpu]); + while (ticket.choosing) { + wfe(); + ticket = read_ticket_once(tickets[cpu]); + } + + number_cpu = ticket.number; + + /* + * Wait until that CPU updates its ticket. We only need to do + * the comparison once, since any update to tickets[cpu].number + * will be to a value greater than ours, or zero. + */ + if (number_cpu != 0 && less_than(cpu, number_cpu, + self, number_self)) { + do { + wfe(); + ticket = read_ticket_once(tickets[cpu]); + } while (number_cpu == ticket.number); + } + } + + dmb(sy); +} + +void bakery_unlock(bakery_ticket_t *tickets, unsigned self) +{ + dmb(sy); + + write_ticket_once(tickets[self], 0, 0); + + dsb(st); + sev(); +} diff --git a/include/bakery_lock.h b/include/bakery_lock.h new file mode 100644 index 0000000..ebeb893 --- /dev/null +++ b/include/bakery_lock.h @@ -0,0 +1,48 @@ +/* + * include/bakery_lock.h + * + * Copyright (C) 2015 ARM Limited. All rights reserved. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE.txt file. + */ + +#ifndef __BAKERY_LOCK_H +#define __BAKERY_LOCK_H + +#include <stdint.h> + +#include <compiler.h> + +/* + * We *must* access this structure with 16 or 8 bit accesses, aligned on 16-bit. + * Helpers read/write_ticket_once should be used for this. + */ +typedef union { + struct __packed { + uint16_t number : 15; + uint16_t choosing : 1; + }; + uint16_t __val; +} bakery_ticket_t; + +#define write_ticket_once(ticket, choosing_, number_) \ +({ \ + bakery_ticket_t __t = { \ + .number = (number_), \ + .choosing = (choosing_), \ + }; \ + *(volatile uint16_t *)&(ticket).__val = __t.__val; \ +}) + +#define read_ticket_once(ticket) \ +({ \ + bakery_ticket_t __t; \ + __t.__val = *(volatile uint16_t *)&(ticket).__val; \ + __t; \ +}) + +void bakery_lock(bakery_ticket_t *tickets, unsigned self); +void bakery_unlock(bakery_ticket_t *tickets, unsigned self); + +#endif diff --git a/include/compiler.h b/include/compiler.h index f9ee2f3..9d7a92a 100644 --- a/include/compiler.h +++ b/include/compiler.h @@ -14,5 +14,6 @@ #define unreachable() __builtin_unreachable() #define __noreturn __attribute__((noreturn)) +#define __packed __attribute__((packed)) #endif diff --git a/include/cpu.h b/include/cpu.h index b4dae94..704d6d8 100644 --- a/include/cpu.h +++ b/include/cpu.h @@ -16,6 +16,7 @@ #ifndef __ASSEMBLY__ #define isb() asm volatile ("isb\n" : : : "memory") +#define dmb(arg) asm volatile ("dmb " #arg "\n" : : : "memory") #define dsb(arg) asm volatile ("dsb " #arg "\n" : : : "memory") #define sev() asm volatile ("sev\n" : : : "memory") #define wfe() asm volatile ("wfe\n" : : : "memory") diff --git a/include/psci.h b/include/psci.h index a2caa59..21ca5ee 100644 --- a/include/psci.h +++ b/include/psci.h @@ -24,6 +24,4 @@ #define PSCI_ADDR_INVALID (-1) -#include <asm/psci.h> - #endif @@ -9,6 +9,7 @@ #include <stdint.h> +#include <bakery_lock.h> #include <cpu.h> #include <psci.h> #include <spin.h> @@ -19,18 +20,29 @@ static unsigned long branch_table[NR_CPUS]; +bakery_ticket_t branch_table_lock[NR_CPUS]; + +static int psci_store_address(unsigned int cpu, unsigned long address) +{ + if (branch_table[cpu] != PSCI_ADDR_INVALID) + return PSCI_RET_ALREADY_ON; + + branch_table[cpu] = address; + return PSCI_RET_SUCCESS; +} + int psci_cpu_on(unsigned long target_mpidr, unsigned long address) { int ret; unsigned int cpu = find_logical_id(target_mpidr); + unsigned int this_cpu = find_logical_id(read_mpidr()); if (cpu == MPIDR_INVALID) return PSCI_RET_INVALID_PARAMETERS; - ret = psci_store_address(address, branch_table + cpu); - - dsb(ishst); - sev(); + bakery_lock(branch_table_lock, this_cpu); + ret = psci_store_address(cpu, address); + bakery_unlock(branch_table_lock, this_cpu); return ret; } |