aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArd Biesheuvel <ardb@kernel.org>2022-02-21 09:27:38 +0100
committerArd Biesheuvel <ardb@kernel.org>2022-02-21 17:48:21 +0100
commit5ca7e5f74d85e6d2ddeabaa1b85b64ae50864676 (patch)
tree504bf804836122424dec6dea19abdf481309b3cb
parent41db0218db7acdc78b6a0e53dc3b811d261e9ca9 (diff)
downloadlinux-arm64-ro-page-tables-pkvm-v5.17.tar.gz
arm64: kvm: track intermediate page tables at EL2arm64-ro-page-tables-pkvm-v5.17
Implement the logic to map intermediate page tables read-only at stage 2, and to track their state to ensure that only a single table entry refers to it. Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pgtable_protect.c225
1 files changed, 222 insertions, 3 deletions
diff --git a/arch/arm64/kvm/hyp/nvhe/pgtable_protect.c b/arch/arm64/kvm/hyp/nvhe/pgtable_protect.c
index 5437e4006f2761..ef07a5eafdd4de 100644
--- a/arch/arm64/kvm/hyp/nvhe/pgtable_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/pgtable_protect.c
@@ -113,17 +113,235 @@ static void inject_ptp_host_exception(struct kvm_cpu_context *host_ctxt)
ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
}
+static pteval_t fixmap_get_pteval(phys_addr_t phys_ptep, int idx)
+{
+ const void *p = hyp_fixmap_map(phys_ptep + 8 * idx);
+ pteval_t ret = p ? *(pteval_t *)p : 0x0;
+ hyp_fixmap_unmap();
+ return ret;
+}
+
+/*
+ * Resolve @address against the page table hierarchy starting from @pgd, and
+ * decide whether @pteval appearing at @ptep amounts to a block or page mapping
+ * for @address. In doubt, return false.
+ */
+static bool is_block_or_page_mapping(phys_addr_t pgd, u64 addr, pte_t *ptep,
+ u64 pteval, int *level)
+{
+ bool is_block = false;
+ phys_addr_t p4d;
+ phys_addr_t pud;
+ phys_addr_t pmd;
+ phys_addr_t pte;
+
+ *level = -1;
+
+ /* not enough information to decide - err on the side of caution */
+ if (!pgd || addr == ULONG_MAX)
+ return false;
+
+ /* check for block mapping - encodings are the same for levels < 3 */
+ if ((pteval & PMD_TYPE_MASK) == PMD_TYPE_SECT)
+ is_block = true;
+
+ /*
+ * pteval is a valid entry, and could describe either a table mapping
+ * or a page mapping, depending on which level it happens to appear at.
+ * Walk the page tables to figure this out.
+ */
+ if (((u64)ptep & PAGE_MASK) == pgd) {
+ *level = 4 - CONFIG_PGTABLE_LEVELS;
+ return is_block && *level > 0;
+ }
+
+ p4d = __pgd_to_phys(__pgd(fixmap_get_pteval(pgd, pgd_index(addr))));
+ if (__is_defined(__PAGETABLE_P4D_FOLDED)) {
+ pud = p4d;
+ } else {
+ if (((u64)ptep & PAGE_MASK) == p4d) {
+ *level = 0;
+ return false;
+ }
+ pud = __p4d_to_phys(__p4d(fixmap_get_pteval(p4d, p4d_index(addr))));
+ }
+ if (__is_defined(__PAGETABLE_PUD_FOLDED)) {
+ pmd = pud;
+ } else {
+ if (((u64)ptep & PAGE_MASK) == pud) {
+ *level = 1;
+ return is_block;
+ }
+ pmd = __pud_to_phys(__pud(fixmap_get_pteval(pud, pud_index(addr))));
+ }
+
+ if (__is_defined(__PAGETABLE_PMD_FOLDED)) {
+ pte = pmd;
+ } else {
+ if (((u64)ptep & PAGE_MASK) == pmd) {
+ *level = 2;
+ return is_block;
+ }
+ pte = __pmd_to_phys(__pmd(fixmap_get_pteval(pmd, pmd_index(addr))));
+ }
+
+ if (((u64)ptep & PAGE_MASK) == pte) {
+ *level = 3;
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Check whether creating @count valid entries at @level for the target pages
+ * described in @pteval[] is permitted by the policy.
+ */
+static bool pkvm_pgtable_policy_allows(phys_addr_t phys_pgdp, bool is_table,
+ int level, const u64 *pteval, int count)
+{
+ int i;
+
+ if (level == 3) {
+ /*
+ * Don't allow page mappings of pgtable pages, to avoid
+ * mistaking them for table mappings upon release.
+ */
+ for (i = 0; i < count; i++) {
+ u64 pa = __pte_to_phys(__pte(pteval[i]));
+
+ if ((pteval[i] & PTE_VALID) &&
+ addr_is_memory(pa) &&
+ !kvm_pgtable_ptp_is_untracked(pa))
+ return false;
+ }
+ }
+
+ //
+ //
+ // TODO invoke policy engine
+ //
+ //
+
+ return true;
+}
+
+/*
+ * Life cycle of a EL1 page table
+ * ==============================
+ *
+ * EL1 is in charge of allocating and freeing pages to be used for intermediate
+ * page tables, but we have to keep track of them at EL2 in order to maintain
+ * read-only mappings of those pages at stage 2, to force the EL1 OS to use the
+ * HYP api to make modifications to the layout of each virtual address space.
+ *
+ * While root page tables are assigned and released explicitly, intermediate
+ * page tables are tracked by interpreting the changes made by the EL1 OS using
+ * the routine below. If the call results in a table entry to be created or
+ * removed, this fact must be reflected in the stage 2 tracking of the page.
+ *
+ * So the simple rules are:
+ * - if the update creates a table mapping, the target page is remapped
+ * read-only at stage 2, wiped (*) and marked as a table page, unless it
+ * is already in that state, in which case the update is rejected;
+ * - if the update removes a table mapping, the target page is marked as
+ * untracked, and remapped read-write again.
+ *
+ * There are two issues that make this slightly more complicated than desired:
+ * - The core mm layer in Linux does not provide a target address for every page
+ * table modification arriving through the API below, but only for ones that
+ * create block or page mappings.
+ * - we cannot easily distinguish between level 3 page mappings and higher level
+ * table mappings, given that they use the same descriptor bit.
+ *
+ * A new valid mapping is assumed to be a table mapping unless the pgd+address
+ * arguments identify it positively as a block or page mapping. The target of a
+ * new table mapping must not be in pgroot or pgtable state, and will be wiped
+ * and moved into pgtable state before the new valid mapping is created.
+ *
+ * If the new descriptor value is 0x0 and the entry is covered by a pgroot or
+ * pgtable page, and refers to a page that is currently in pgtable state, the
+ * page is reverted to default state after the old valid mapping is removed.
+ *
+ * (*) migration of level 2 entries is permitted as well, but only if all valid
+ * level 3 mappings they cover comply with the policy.
+ */
void handle___pkvm_xchg_ro_pte(struct kvm_cpu_context *host_ctxt)
{
- //DECLARE_REG(pgd_t *, pgdp, host_ctxt, 1);
+ DECLARE_REG(u64, pgdp, host_ctxt, 1);
DECLARE_REG(u64, address, host_ctxt, 2);
DECLARE_REG(u64, ptep, host_ctxt, 3);
DECLARE_REG(u64, pteval, host_ctxt, 4);
+ bool is_tracked;
pte_t *ptaddr;
+ u64 oldval;
+
+ is_tracked = !kvm_pgtable_ptp_is_untracked(ptep);
+
+ if (is_tracked && (pteval & PTE_VALID)) {
+ bool is_table;
+ int level;
+
+ /* valid entries must be created in the context of a pgd[] */
+ if (!pgdp) {
+ // TODO check whether pgdp is pgroot??
+ inject_ptp_host_exception(host_ctxt);
+ return;
+ }
+
+ is_table = !is_block_or_page_mapping(pgdp, address, (pte_t *)ptep,
+ pteval, &level);
+
+ if (!pkvm_pgtable_policy_allows(pgdp, is_table, level, &pteval, 1)) {
+ inject_ptp_host_exception(host_ctxt);
+ return;
+ }
+
+ if (is_table) {
+ u64 pa = __pte_to_phys(__pte(pteval));
+
+ if (!kvm_pgtable_ptp_make_pgtable(pa)) {
+ inject_ptp_host_exception(host_ctxt);
+ return;
+ }
+
+ ptaddr = hyp_fixmap_map(pa);
+
+ if (level == 2) {
+ // We permit moving level 2 entries as long
+ // as all valid level 3 entry they carry pass
+ // the policy check
+ if (!pkvm_pgtable_policy_allows(pgdp, false, 3,
+ (pteval_t *)ptaddr,
+ PTRS_PER_PTE)) {
+ inject_ptp_host_exception(host_ctxt);
+ return;
+ }
+ } else {
+ // wipe the page before first use
+ memset(ptaddr, 0, PAGE_SIZE);
+ }
+ hyp_fixmap_unmap();
+ }
+ }
ptaddr = hyp_fixmap_map(ptep);
- cpu_reg(host_ctxt, 1) = xchg_relaxed(&pte_val(*ptaddr), pteval);
+ oldval = xchg_relaxed(&pte_val(*ptaddr), pteval);
hyp_fixmap_unmap();
+
+ /*
+ * If the old entry was a valid table or page entry, assume it is the
+ * former and stop tracking it as a page table.
+ * TODO deal with oldval/pteval being valid table mappings of the same page
+ */
+ if (is_tracked && (oldval & PTE_TYPE_MASK) == PTE_TYPE_PAGE) {
+ /*
+ * If we are removing a mapping from a pgtable/pgroot page and
+ * the entry targets a pgtable page, move it to default state.
+ */
+ kvm_pgtable_ptp_make_untracked(__pte_to_phys(__pte(oldval)));
+ }
+ cpu_reg(host_ctxt, 1) = oldval;
}
void handle___pkvm_cmpxchg_ro_pte(struct kvm_cpu_context *host_ctxt)
@@ -139,7 +357,8 @@ void handle___pkvm_cmpxchg_ro_pte(struct kvm_cpu_context *host_ctxt)
* by the page table walker. If we can enforce this at HYP level, there
* is no need to go through the policy check at all.
*/
- if ((oldval ^ newval) & ~(PTE_DIRTY|PTE_WRITE|PTE_AF|PTE_RDONLY)) {
+ if (((oldval ^ newval) & ~(PTE_DIRTY|PTE_WRITE|PTE_AF|PTE_RDONLY)) ||
+ kvm_pgtable_ptp_is_untracked(ptep)) {
inject_ptp_host_exception(host_ctxt);
return;
}