aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Rutland <mark.rutland@arm.com>2013-02-11 16:58:18 +0000
committerMark Rutland <mark.rutland@arm.com>2013-06-05 15:58:12 +0100
commitd9e63728ff454f338e41edd1433b7e5096120279 (patch)
treef00e5d1405f3a2f8b87e6ec4bac295ffb3ed5c5a
parent28ec269a22c8dc141f49a693aea389af88424b0c (diff)
downloadboot-wrapper-aarch64-d9e63728ff454f338e41edd1433b7e5096120279.tar.gz
Add simple PSCI implementation
This patch adds a simple PSCI implementation, only supporting CPU_ON and CPU_OFF. As this does not communicate with any hardware power controller (yet), CPUs spin in an internal pen, with a wfe to limit their polling speed. While the model brings up CPUs with caches invalidated, we enable caches and the MMU to allow the use of exclusive operations in the bootwrapper, and thus the cache may allocate entries while in EL3. As PSCI requires that caches are invalid when executing from a CPU_ON entry point, the caches must be cleaned and invalided when we drop to EL2. This cleaning is performed in a shim in EL2 as this is simpler than enabling/disabling caches and the MMU on each SMC. The list of all CPU IDs (MPIDRS with non-aff bits masked out) in the system must be provided in the Makefile as the comma-separated list CPU_IDs, to enable the bootwrapper to differentiate CPUs and provide the correct error messages if for example the OS attempts to power on a CPU multiple times. If this list does not match the CPUs present, it may not be possible to bring some CPUs online, and the PSCI implementation may erroneously acknowledge power on requests for non-existent CPUs. Signed-off-by: Mark Rutland <mark.rutland@arm.com>
-rw-r--r--Makefile8
-rw-r--r--mmu.S146
-rw-r--r--model.lds.S2
-rw-r--r--psci.S262
4 files changed, 415 insertions, 3 deletions
diff --git a/Makefile b/Makefile
index 50dca3b..18bc910 100644
--- a/Makefile
+++ b/Makefile
@@ -13,8 +13,10 @@ SYSREGS_BASE := 0x1c010000
GIC_DIST_BASE := 0x2c001000
GIC_CPU_BASE := 0x2c002000
CNTFRQ := 0x01800000 # 24Mhz
+CPU_IDS ?= 0x0,0x1,0x2,0x3
DEFINES += -DCNTFRQ=$(CNTFRQ)
+DEFINES += -DCPU_IDS=$(CPU_IDS)
DEFINES += -DGIC_CPU_BASE=$(GIC_CPU_BASE)
DEFINES += -DGIC_DIST_BASE=$(GIC_DIST_BASE)
DEFINES += -DSYSREGS_BASE=$(SYSREGS_BASE)
@@ -24,7 +26,7 @@ DEFINES += -DUART_BASE=$(UART_BASE)
CPPFLAGS += $(INITRD_FLAGS)
BOOTLOADER := boot.S
-BOOTMETHOD := spin.o
+BOOTMETHOD := psci.o
MBOX_OFFSET := 0xfff8
KERNEL := Image
KERNEL_OFFSET := 0x80000
@@ -66,9 +68,9 @@ DTC := $(if $(wildcard ./dtc), ./dtc, $(shell which dtc))
all: $(IMAGE)
clean:
- rm -f $(IMAGE) boot.o cache.o gic.o ns.o $(BOOTMETHOD) model.lds fdt.dtb
+ rm -f $(IMAGE) boot.o cache.o gic.o mmu.o ns.o $(BOOTMETHOD) model.lds fdt.dtb
-$(IMAGE): boot.o cache.o gic.o ns.o $(BOOTMETHOD) model.lds fdt.dtb $(KERNEL) $(FILESYSTEM)
+$(IMAGE): boot.o cache.o gic.o mmu.o ns.o $(BOOTMETHOD) model.lds fdt.dtb $(KERNEL) $(FILESYSTEM)
$(LD) -o $@ --script=model.lds
%.o: %.S Makefile
diff --git a/mmu.S b/mmu.S
new file mode 100644
index 0000000..5468b5e
--- /dev/null
+++ b/mmu.S
@@ -0,0 +1,146 @@
+/*
+ * mmu.S - EL3 MMU identity map code to enable the use of exclusives.
+ *
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE.txt file.
+ */
+
+#define ATTR_UPPER_XN (1 << 54) /* Non-Executable */
+#define ATTR_UPPER_PXN (1 << 53) /* Non-Executable */
+
+#define MEM_ATTR_UPPER 0
+#define DEV_ATTR_UPPER (ATTR_UPPER_XN | ATTR_UPPER_PXN)
+
+#define ATTR_LOWER_AF (1 << 10) /* Don't trap accesses */
+#define ATTR_LOWER_SH_IS (3 << 8) /* Inner shareable */
+#define ATTR_LOWER_SH_NS (0 << 8) /* Inner shareable */
+
+#define ATTR_LOWER_AP_RW_ANY (1 << 6) /* Writeable at any privilege level */
+
+#define ATTR_LOWER_NS (1 << 5) /* non-secure PA */
+#define ATTR_LOWER_ATTRINDEX(n) ((n) << 2) /* MAIR_EL3 attrN */
+
+#define MEM_ATTR_LOWER (ATTR_LOWER_AF | ATTR_LOWER_SH_IS | \
+ ATTR_LOWER_NS | ATTR_LOWER_ATTRINDEX(0))
+
+#define DEV_ATTR_LOWER (ATTR_LOWER_AF | ATTR_LOWER_SH_NS | \
+ ATTR_LOWER_NS | ATTR_LOWER_ATTRINDEX(1))
+
+#define BLOCK_VALID (1 << 0) /* Valid block entry */
+
+/*
+ * the top 10 bits of PA [39:30]
+ */
+#define BLOCK_1GB_PA(_pa) ((_pa) & (0x3ff << 30))
+
+#define BLOCK_MEM_1GB(_pa) (MEM_ATTR_UPPER | BLOCK_1GB_PA(_pa) | \
+ MEM_ATTR_LOWER | BLOCK_VALID)
+
+#define BLOCK_DEV_1GB(_pa) (DEV_ATTR_UPPER | BLOCK_1GB_PA(_pa) | \
+ DEV_ATTR_LOWER | BLOCK_VALID)
+
+ .data
+
+#define BLOCK_INVALID (0 << 0)
+
+ /*
+ * 1st level page table.
+ * 512 entries, each covering 1GB.
+ */
+ .align 12
+pgtable_l1:
+ .quad BLOCK_DEV_1GB(0x00000000)
+ .quad BLOCK_INVALID
+ .quad BLOCK_MEM_1GB(0x80000000)
+ .quad BLOCK_MEM_1GB(0xC0000000)
+ .rept 30
+ .quad BLOCK_INVALID
+ .endr
+ .quad BLOCK_MEM_1GB(0x880000000)
+ .quad BLOCK_MEM_1GB(0x8C0000000)
+ .rept (512-36)
+ .quad BLOCK_INVALID
+ .endr
+
+/*
+ * attr0: Normal memory, outer non-cacheable, inner write-through non-transient
+ * attrN: device-nGnRnE
+ */
+#define MAIR_ATTR 0x48
+
+#define TCR_RES1 ((1 << 31) | (1 << 23))
+#define TCR_PS (2 << 16) /* 40 bits */
+#define TCR_TG0 (0 << 14) /* 4KB */
+#define TCR_SH0 (3 << 12) /* inner shareable */
+#define TCR_ORGN0 (0 << 10) /* normal outer non-cacheable */
+#define TCR_IRGN0 (2 << 8) /* normal inner write-through */
+#define TCR_T0SZ (25 << 0) /* 2^39 bits (2^(64-25)) */
+
+#define TCR_VAL (TCR_RES1 | TCR_PS | TCR_TG0 | TCR_SH0 | TCR_ORGN0 | TCR_IRGN0 | TCR_T0SZ)
+
+#define SCTLR_RES1 ((3 << 28) | (3 << 22) | (1 << 18) | (1 << 16) | (1 << 11) | (3 << 4))
+#define SCTLR_EE (0 << 25) /* little endian */
+#define SCTLR_WXN (0 << 19) /* regions with write permission not forced to XN */
+#define SCTLR_I (0 << 12) /* Disable I cache */
+#define SCTLR_SA (0 << 3) /* No stack alignment checking */
+#define SCTLR_C (0 << 2) /* Disable caches */
+#define SCTLR_A (0 << 1) /* No alignment checking */
+#define SCTLR_M (1 << 0) /* enable MMU */
+
+#define SCTLR_VAL (SCTLR_RES1 | SCTLR_EE | SCTLR_WXN | SCTLR_I | \
+ SCTLR_SA | SCTLR_C | SCTLR_A | SCTLR_M)
+
+ .text
+
+ .globl switch_to_idmap
+ .globl switch_to_physmap
+
+switch_to_idmap:
+
+ mov x28, x30
+
+ /*
+ * We assume that the d-caches are invalid at power-on, and hence do
+ * not need to be invalidated. However the icache(s) and TLBs may still
+ * be filled with garbage.
+ */
+ ic iallu
+ tlbi alle3
+ dsb sy
+ isb
+
+ adr x0, pgtable_l1
+ msr ttbr0_el3, x0
+
+ ldr x0, =MAIR_ATTR
+ msr mair_el3, x0
+
+ ldr x0, =TCR_VAL
+ msr tcr_el3, x0
+
+ isb
+
+ ldr x0, =SCTLR_VAL
+ msr sctlr_el3, x0
+
+ isb
+
+ /* Identity map now active, branch back to phys/virt address */
+ ret x28
+
+switch_to_physmap:
+ mov x28, x30
+
+ mrs x0, sctlr_el3
+ mov x1, #(SCTLR_M | SCTLR_C)
+ bic x0, x0, x1
+ msr sctlr_el3, x0
+
+ isb
+
+ bl flush_caches
+
+ ret x28
+
diff --git a/model.lds.S b/model.lds.S
index 53d0f7f..401ea5f 100644
--- a/model.lds.S
+++ b/model.lds.S
@@ -14,6 +14,7 @@ TARGET(binary)
INPUT(./boot.o)
INPUT(./cache.o)
INPUT(./gic.o)
+INPUT(./mmu.o)
INPUT(./ns.o)
INPUT(./BOOTMETHOD)
INPUT(KERNEL)
@@ -29,6 +30,7 @@ SECTIONS
.text : { boot.o }
.text : { cache.o }
.text : { gic.o }
+ .text : { mmu.o }
.text : { ns.o }
.text : { BOOTMETHOD }
. = PHYS_OFFSET + MBOX_OFFSET;
diff --git a/psci.S b/psci.S
new file mode 100644
index 0000000..5f59e2a
--- /dev/null
+++ b/psci.S
@@ -0,0 +1,262 @@
+/*
+ * psci.S - basic PSCI implementation
+ *
+ * Copyright (C) 2013 ARM Limited. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE.txt file.
+ */
+#include "common.S"
+
+#define PSCI_CPU_OFF 0x84000001
+#define PSCI_CPU_ON 0x84000002
+
+#define PSCI_RET_SUCCESS 0
+#define PSCI_RET_NOT_IMPL (-1)
+#define PSCI_RET_INVALID (-2)
+#define PSCI_RET_DENIED (-3)
+
+#ifndef CPU_IDS
+#error No CPU MPIDRs provided.
+#endif
+
+#define MPIDR_INVALID (-1)
+#define ADDR_INVALID (-1)
+
+ .macro ventry label
+ .align 7
+ b \label
+ .endm
+
+ .data
+
+ .align 11
+vector:
+ // current EL, SP_EL0
+ ventry err_exception // synchronous
+ ventry err_exception // IRQ
+ ventry err_exception // FIQ
+ ventry err_exception // SError
+
+ // current EL, SP_ELx
+ ventry err_exception
+ ventry err_exception
+ ventry err_exception
+ ventry err_exception
+
+ // lower EL, AArch64
+ ventry psci_call64
+ ventry err_exception
+ ventry err_exception
+ ventry err_exception
+
+ // lower EL, AArch32
+ ventry psci_call32
+ ventry err_exception
+ ventry err_exception
+ ventry err_exception
+
+ /*
+ * Array of the CPU ID (MPIDR & MPIDR_ID_BITS) of each CPU in the system.
+ * The index into the array is used as a logical id, and an index into
+ * the branch table. The branch table is automatically padded to the
+ * same size as the id table.
+ *
+ * The first CPU in the table is considered to be the primary CPU, and
+ * is the only CPU to immediately branch off to the kernel.
+ */
+ .align 3
+id_table:
+ .quad CPU_IDS
+__id_end:
+ .quad MPIDR_INVALID
+
+.equ nr_cpus, ((__id_end - id_table) / 8)
+
+branch_table:
+ .rept (nr_cpus)
+ .quad ADDR_INVALID
+ .endr
+
+ .text
+
+ .globl start_no_el3
+ .globl start_el3
+
+err_exception:
+ b err_exception
+
+psci_call32:
+ mov w0, PSCI_RET_NOT_IMPL
+ eret
+
+psci_call64:
+ ldr x7, =PSCI_CPU_OFF
+ cmp x0, x7
+ b.eq psci_cpu_off
+
+ ldr x7, =PSCI_CPU_ON
+ cmp x0, x7
+ b.eq psci_cpu_on
+
+ mov x0, PSCI_RET_NOT_IMPL
+ eret
+
+/*
+ * x1 - optional power state parameter, ignored here
+ */
+psci_cpu_off:
+ mrs x0, mpidr_el1
+ ldr x1, =MPIDR_ID_BITS
+ and x0, x0, x1
+ bl find_logical_id
+ adr x1, branch_table
+ mov x2, #ADDR_INVALID
+ str x2, [x1, x0, lsl #3]
+
+ b spin
+
+/*
+ * x1 - target cpu
+ * x2 - address
+ */
+psci_cpu_on:
+ mov x15, x30
+ mov x14, x2
+ mov x0, x1
+
+ bl find_logical_id
+ cmp x0, #-1
+ b.eq 1f
+
+ adr x3, branch_table
+ add x3, x3, x0, lsl #3
+
+ ldr x4, =ADDR_INVALID
+
+ ldxr x5, [x3]
+ cmp x4, x5
+ b.ne 1f
+
+ stxr w4, x14, [x3]
+ cbnz w4, 1f
+
+ dsb ishst
+ sev
+
+ mov x0, #PSCI_RET_SUCCESS
+ mov x30, x15
+ eret
+
+1: mov x0, #PSCI_RET_DENIED
+ mov x30, x15
+ eret
+
+
+/*
+ * Takes masked MPIDR in x0, returns logical id in x0
+ * Returns -1 for unknown MPIDRs
+ * Clobbers x1, x2, x3
+ */
+find_logical_id:
+__find_logical_index:
+ adr x2, id_table
+ mov x1, xzr
+1: mov x3, #nr_cpus // check we haven't walked off the end of the array
+ cmp x1, x3
+ b.gt 3f
+ ldr x3, [x2, x1, lsl #3]
+ cmp x3, x0
+ b.eq 2f
+ add x1, x1, #1
+ b 1b
+2: mov x0, x1
+ ret
+3: mov x0, #-1
+ ret
+
+setup_vector:
+ adr x0, vector
+ msr VBAR_EL3, x0
+ isb
+ ret
+
+start_el3:
+ bl setup_vector
+ bl switch_to_idmap
+
+ /* only boot the primary cpu (entry 0 in the table) */
+ mrs x0, mpidr_el1
+ ldr x1, =MPIDR_ID_BITS
+ and x0, x0, x1
+ bl find_logical_id
+ cbnz x0, spin
+
+ adr x2, branch_table
+ adr x1, start_cpu0
+ str x1, [x2]
+ sevl
+ b spin
+
+/*
+ * Poll the release table, waiting for a valid address to appear.
+ * When a valid address appears, branch to it.
+ */
+spin:
+ mrs x0, mpidr_el1
+ ldr x1, =MPIDR_ID_BITS
+ and x0, x0, x1
+ bl find_logical_id
+ cmp x0, #-1
+ b.eq spin_dead
+
+ adr x1, branch_table
+ mov x3, #ADDR_INVALID
+
+ add x1, x1, x0, lsl #3
+
+1: wfe
+ ldr x2, [x1]
+ cmp x2, x3
+ b.eq 1b
+
+ mov x3, #SPSR_KERNEL
+ adr x4, el2_trampoline
+ mov x0, x2
+ drop_el x3, x4
+
+/*
+ * This PSCI implementation requires EL3. Without EL3 we'll only boot the
+ * primary cpu, all others will be trapped in an infinite loop.
+ */
+start_no_el3:
+ mrs x0, mpidr_el1
+ ldr x1, =MPIDR_ID_BITS
+ and x0, x0, x1
+ bl find_logical_id
+ cbz x0, start_cpu0
+spin_dead:
+ wfe
+ b spin_dead
+
+
+/*
+ * Clean and invalidate the caches at EL2 to simplify EL3's cache usage.
+ */
+el2_trampoline:
+ mov x15, x0
+ bl flush_caches
+ br x15
+
+start_cpu0:
+ /*
+ * Kernel parameters
+ */
+ mov x0, xzr
+ mov x1, xzr
+ mov x2, xzr
+ mov x3, xzr
+
+ bl ns_init_system
+ ldr x0, =dtb
+ b kernel