diff options
author | Ard Biesheuvel <ardb@kernel.org> | 2022-02-21 09:27:38 +0100 |
---|---|---|
committer | Ard Biesheuvel <ardb@kernel.org> | 2022-02-21 17:48:21 +0100 |
commit | 5ca7e5f74d85e6d2ddeabaa1b85b64ae50864676 (patch) | |
tree | 504bf804836122424dec6dea19abdf481309b3cb | |
parent | 41db0218db7acdc78b6a0e53dc3b811d261e9ca9 (diff) | |
download | linux-arm64-ro-page-tables-pkvm-v5.17.tar.gz |
arm64: kvm: track intermediate page tables at EL2arm64-ro-page-tables-pkvm-v5.17
Implement the logic to map intermediate page tables read-only at stage
2, and to track their state to ensure that only a single table entry
refers to it.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/pgtable_protect.c | 225 |
1 files changed, 222 insertions, 3 deletions
diff --git a/arch/arm64/kvm/hyp/nvhe/pgtable_protect.c b/arch/arm64/kvm/hyp/nvhe/pgtable_protect.c index 5437e4006f2761..ef07a5eafdd4de 100644 --- a/arch/arm64/kvm/hyp/nvhe/pgtable_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/pgtable_protect.c @@ -113,17 +113,235 @@ static void inject_ptp_host_exception(struct kvm_cpu_context *host_ctxt) ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); } +static pteval_t fixmap_get_pteval(phys_addr_t phys_ptep, int idx) +{ + const void *p = hyp_fixmap_map(phys_ptep + 8 * idx); + pteval_t ret = p ? *(pteval_t *)p : 0x0; + hyp_fixmap_unmap(); + return ret; +} + +/* + * Resolve @address against the page table hierarchy starting from @pgd, and + * decide whether @pteval appearing at @ptep amounts to a block or page mapping + * for @address. In doubt, return false. + */ +static bool is_block_or_page_mapping(phys_addr_t pgd, u64 addr, pte_t *ptep, + u64 pteval, int *level) +{ + bool is_block = false; + phys_addr_t p4d; + phys_addr_t pud; + phys_addr_t pmd; + phys_addr_t pte; + + *level = -1; + + /* not enough information to decide - err on the side of caution */ + if (!pgd || addr == ULONG_MAX) + return false; + + /* check for block mapping - encodings are the same for levels < 3 */ + if ((pteval & PMD_TYPE_MASK) == PMD_TYPE_SECT) + is_block = true; + + /* + * pteval is a valid entry, and could describe either a table mapping + * or a page mapping, depending on which level it happens to appear at. + * Walk the page tables to figure this out. + */ + if (((u64)ptep & PAGE_MASK) == pgd) { + *level = 4 - CONFIG_PGTABLE_LEVELS; + return is_block && *level > 0; + } + + p4d = __pgd_to_phys(__pgd(fixmap_get_pteval(pgd, pgd_index(addr)))); + if (__is_defined(__PAGETABLE_P4D_FOLDED)) { + pud = p4d; + } else { + if (((u64)ptep & PAGE_MASK) == p4d) { + *level = 0; + return false; + } + pud = __p4d_to_phys(__p4d(fixmap_get_pteval(p4d, p4d_index(addr)))); + } + if (__is_defined(__PAGETABLE_PUD_FOLDED)) { + pmd = pud; + } else { + if (((u64)ptep & PAGE_MASK) == pud) { + *level = 1; + return is_block; + } + pmd = __pud_to_phys(__pud(fixmap_get_pteval(pud, pud_index(addr)))); + } + + if (__is_defined(__PAGETABLE_PMD_FOLDED)) { + pte = pmd; + } else { + if (((u64)ptep & PAGE_MASK) == pmd) { + *level = 2; + return is_block; + } + pte = __pmd_to_phys(__pmd(fixmap_get_pteval(pmd, pmd_index(addr)))); + } + + if (((u64)ptep & PAGE_MASK) == pte) { + *level = 3; + return true; + } + + return false; +} + +/* + * Check whether creating @count valid entries at @level for the target pages + * described in @pteval[] is permitted by the policy. + */ +static bool pkvm_pgtable_policy_allows(phys_addr_t phys_pgdp, bool is_table, + int level, const u64 *pteval, int count) +{ + int i; + + if (level == 3) { + /* + * Don't allow page mappings of pgtable pages, to avoid + * mistaking them for table mappings upon release. + */ + for (i = 0; i < count; i++) { + u64 pa = __pte_to_phys(__pte(pteval[i])); + + if ((pteval[i] & PTE_VALID) && + addr_is_memory(pa) && + !kvm_pgtable_ptp_is_untracked(pa)) + return false; + } + } + + // + // + // TODO invoke policy engine + // + // + + return true; +} + +/* + * Life cycle of a EL1 page table + * ============================== + * + * EL1 is in charge of allocating and freeing pages to be used for intermediate + * page tables, but we have to keep track of them at EL2 in order to maintain + * read-only mappings of those pages at stage 2, to force the EL1 OS to use the + * HYP api to make modifications to the layout of each virtual address space. + * + * While root page tables are assigned and released explicitly, intermediate + * page tables are tracked by interpreting the changes made by the EL1 OS using + * the routine below. If the call results in a table entry to be created or + * removed, this fact must be reflected in the stage 2 tracking of the page. + * + * So the simple rules are: + * - if the update creates a table mapping, the target page is remapped + * read-only at stage 2, wiped (*) and marked as a table page, unless it + * is already in that state, in which case the update is rejected; + * - if the update removes a table mapping, the target page is marked as + * untracked, and remapped read-write again. + * + * There are two issues that make this slightly more complicated than desired: + * - The core mm layer in Linux does not provide a target address for every page + * table modification arriving through the API below, but only for ones that + * create block or page mappings. + * - we cannot easily distinguish between level 3 page mappings and higher level + * table mappings, given that they use the same descriptor bit. + * + * A new valid mapping is assumed to be a table mapping unless the pgd+address + * arguments identify it positively as a block or page mapping. The target of a + * new table mapping must not be in pgroot or pgtable state, and will be wiped + * and moved into pgtable state before the new valid mapping is created. + * + * If the new descriptor value is 0x0 and the entry is covered by a pgroot or + * pgtable page, and refers to a page that is currently in pgtable state, the + * page is reverted to default state after the old valid mapping is removed. + * + * (*) migration of level 2 entries is permitted as well, but only if all valid + * level 3 mappings they cover comply with the policy. + */ void handle___pkvm_xchg_ro_pte(struct kvm_cpu_context *host_ctxt) { - //DECLARE_REG(pgd_t *, pgdp, host_ctxt, 1); + DECLARE_REG(u64, pgdp, host_ctxt, 1); DECLARE_REG(u64, address, host_ctxt, 2); DECLARE_REG(u64, ptep, host_ctxt, 3); DECLARE_REG(u64, pteval, host_ctxt, 4); + bool is_tracked; pte_t *ptaddr; + u64 oldval; + + is_tracked = !kvm_pgtable_ptp_is_untracked(ptep); + + if (is_tracked && (pteval & PTE_VALID)) { + bool is_table; + int level; + + /* valid entries must be created in the context of a pgd[] */ + if (!pgdp) { + // TODO check whether pgdp is pgroot?? + inject_ptp_host_exception(host_ctxt); + return; + } + + is_table = !is_block_or_page_mapping(pgdp, address, (pte_t *)ptep, + pteval, &level); + + if (!pkvm_pgtable_policy_allows(pgdp, is_table, level, &pteval, 1)) { + inject_ptp_host_exception(host_ctxt); + return; + } + + if (is_table) { + u64 pa = __pte_to_phys(__pte(pteval)); + + if (!kvm_pgtable_ptp_make_pgtable(pa)) { + inject_ptp_host_exception(host_ctxt); + return; + } + + ptaddr = hyp_fixmap_map(pa); + + if (level == 2) { + // We permit moving level 2 entries as long + // as all valid level 3 entry they carry pass + // the policy check + if (!pkvm_pgtable_policy_allows(pgdp, false, 3, + (pteval_t *)ptaddr, + PTRS_PER_PTE)) { + inject_ptp_host_exception(host_ctxt); + return; + } + } else { + // wipe the page before first use + memset(ptaddr, 0, PAGE_SIZE); + } + hyp_fixmap_unmap(); + } + } ptaddr = hyp_fixmap_map(ptep); - cpu_reg(host_ctxt, 1) = xchg_relaxed(&pte_val(*ptaddr), pteval); + oldval = xchg_relaxed(&pte_val(*ptaddr), pteval); hyp_fixmap_unmap(); + + /* + * If the old entry was a valid table or page entry, assume it is the + * former and stop tracking it as a page table. + * TODO deal with oldval/pteval being valid table mappings of the same page + */ + if (is_tracked && (oldval & PTE_TYPE_MASK) == PTE_TYPE_PAGE) { + /* + * If we are removing a mapping from a pgtable/pgroot page and + * the entry targets a pgtable page, move it to default state. + */ + kvm_pgtable_ptp_make_untracked(__pte_to_phys(__pte(oldval))); + } + cpu_reg(host_ctxt, 1) = oldval; } void handle___pkvm_cmpxchg_ro_pte(struct kvm_cpu_context *host_ctxt) @@ -139,7 +357,8 @@ void handle___pkvm_cmpxchg_ro_pte(struct kvm_cpu_context *host_ctxt) * by the page table walker. If we can enforce this at HYP level, there * is no need to go through the policy check at all. */ - if ((oldval ^ newval) & ~(PTE_DIRTY|PTE_WRITE|PTE_AF|PTE_RDONLY)) { + if (((oldval ^ newval) & ~(PTE_DIRTY|PTE_WRITE|PTE_AF|PTE_RDONLY)) || + kvm_pgtable_ptp_is_untracked(ptep)) { inject_ptp_host_exception(host_ctxt); return; } |