aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-11-08 12:39:54 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-11-08 12:39:54 -0800
commit1995a536702921f000acda2bed645f1fe0e7ee5b (patch)
tree15a97ab5a6f4081c8708d39d158c703c86e92109
parent90450a06162e6c71ab813ea22a83196fe7cff4bc (diff)
parent02e790ee3077c0571794d0ab8f71413edbe129cc (diff)
downloadlinux-1995a536702921f000acda2bed645f1fe0e7ee5b.tar.gz
Merge tag 's390-6.7-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull more s390 updates from Vasily Gorbik: - Get rid of s390 specific use of two PTEs per 4KB page with complex half-used pages tracking. Using full 4KB pages for 2KB PTEs increases the memory footprint of page tables but drastically simplify mm code, removing a common blocker for common code changes and adaptations - Simplify and rework "cmma no-dat" handling. This is a follow up for recent fixes which prevent potential incorrect guest TLB flushes - Add perf user stack unwinding as well as USER_STACKTRACE support for user space built with -mbackchain compile option - Add few missing conversion from tlb_remove_table to tlb_remove_ptdesc - Fix crypto cards vanishing in a secure execution environment due to asynchronous errors - Avoid reporting crypto cards or queues in check-stop state as online - Fix null-ptr deference in AP bus code triggered by early config change via SCLP - Couple of stability improvements in AP queue interrupt handling * tag 's390-6.7-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: s390/mm: make pte_free_tlb() similar to pXd_free_tlb() s390/mm: use compound page order to distinguish page tables s390/mm: use full 4KB page for 2KB PTE s390/cmma: rework no-dat handling s390/cmma: move arch_set_page_dat() to header file s390/cmma: move set_page_stable() and friends to header file s390/cmma: move parsing of cmma kernel parameter to early boot code s390/cmma: cleanup inline assemblies s390/ap: fix vanishing crypto cards in SE environment s390/zcrypt: don't report online if card or queue is in check-stop state s390: add USER_STACKTRACE support s390/perf: implement perf_callchain_user() s390/ap: fix AP bus crash on early config change callback invocation s390/ap: re-enable interrupt for AP queues s390/ap: rework to use irq info from ap queue status s390/mm: add missing conversion to use ptdescs
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/boot/ipl_parm.c8
-rw-r--r--arch/s390/boot/startup.c44
-rw-r--r--arch/s390/boot/vmem.c17
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h1
-rw-r--r--arch/s390/include/asm/page-states.h59
-rw-r--r--arch/s390/include/asm/page.h1
-rw-r--r--arch/s390/include/asm/pgalloc.h1
-rw-r--r--arch/s390/include/asm/setup.h3
-rw-r--r--arch/s390/include/asm/stacktrace.h7
-rw-r--r--arch/s390/include/asm/tlb.h13
-rw-r--r--arch/s390/kernel/early.c1
-rw-r--r--arch/s390/kernel/perf_event.c41
-rw-r--r--arch/s390/kernel/stacktrace.c43
-rw-r--r--arch/s390/mm/gmap.c4
-rw-r--r--arch/s390/mm/init.c4
-rw-r--r--arch/s390/mm/page-states.c213
-rw-r--r--arch/s390/mm/pgalloc.c296
-rw-r--r--arch/s390/mm/vmem.c4
-rw-r--r--drivers/s390/crypto/ap_bus.c47
-rw-r--r--drivers/s390/crypto/ap_bus.h1
-rw-r--r--drivers/s390/crypto/ap_queue.c36
-rw-r--r--drivers/s390/crypto/zcrypt_card.c4
-rw-r--r--drivers/s390/crypto/zcrypt_queue.c5
25 files changed, 319 insertions, 537 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b0d67ac8695f9f..3bec98d20283b4 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -236,6 +236,7 @@ config S390
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
select TTY
+ select USER_STACKTRACE_SUPPORT
select VIRT_CPU_ACCOUNTING
select ZONE_DMA
# Note: keep the above list sorted alphabetically
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 7b7521762633f9..2ab4872fbee1c9 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/ctype.h>
#include <linux/pgtable.h>
+#include <asm/page-states.h>
#include <asm/ebcdic.h>
#include <asm/sclp.h>
#include <asm/sections.h>
@@ -24,6 +25,7 @@ unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
struct ipl_parameter_block __bootdata_preserved(ipl_block);
int __bootdata_preserved(ipl_block_valid);
int __bootdata_preserved(__kaslr_enabled);
+int __bootdata_preserved(cmma_flag) = 1;
unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE;
unsigned long memory_limit;
@@ -295,6 +297,12 @@ void parse_boot_command_line(void)
if (!strcmp(param, "nokaslr"))
__kaslr_enabled = 0;
+ if (!strcmp(param, "cmma")) {
+ rc = kstrtobool(val, &enabled);
+ if (!rc && !enabled)
+ cmma_flag = 0;
+ }
+
#if IS_ENABLED(CONFIG_KVM)
if (!strcmp(param, "prot_virt")) {
rc = kstrtobool(val, &enabled);
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 8826c4f1864595..8104e0e3d188d8 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/string.h>
#include <linux/elf.h>
+#include <asm/page-states.h>
#include <asm/boot_data.h>
#include <asm/sections.h>
#include <asm/maccess.h>
@@ -57,6 +58,48 @@ static void detect_facilities(void)
machine.has_nx = 1;
}
+static int cmma_test_essa(void)
+{
+ unsigned long reg1, reg2, tmp = 0;
+ int rc = 1;
+ psw_t old;
+
+ /* Test ESSA_GET_STATE */
+ asm volatile(
+ " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
+ " epsw %[reg1],%[reg2]\n"
+ " st %[reg1],0(%[psw_pgm])\n"
+ " st %[reg2],4(%[psw_pgm])\n"
+ " larl %[reg1],1f\n"
+ " stg %[reg1],8(%[psw_pgm])\n"
+ " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
+ " la %[rc],0\n"
+ "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
+ : [reg1] "=&d" (reg1),
+ [reg2] "=&a" (reg2),
+ [rc] "+&d" (rc),
+ [tmp] "=&d" (tmp),
+ "+Q" (S390_lowcore.program_new_psw),
+ "=Q" (old)
+ : [psw_old] "a" (&old),
+ [psw_pgm] "a" (&S390_lowcore.program_new_psw),
+ [cmd] "i" (ESSA_GET_STATE)
+ : "cc", "memory");
+ return rc;
+}
+
+static void cmma_init(void)
+{
+ if (!cmma_flag)
+ return;
+ if (cmma_test_essa()) {
+ cmma_flag = 0;
+ return;
+ }
+ if (test_facility(147))
+ cmma_flag = 2;
+}
+
static void setup_lpp(void)
{
S390_lowcore.current_pid = 0;
@@ -306,6 +349,7 @@ void startup_kernel(void)
setup_boot_command_line();
parse_boot_command_line();
detect_facilities();
+ cmma_init();
sanitize_prot_virt_host();
max_physmem_end = detect_max_physmem_end();
setup_ident_map_size(max_physmem_end);
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 3075d65e112c1c..e3a4500a5a7571 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -2,6 +2,7 @@
#include <linux/sched/task.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
+#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/facility.h>
#include <asm/sections.h>
@@ -70,6 +71,10 @@ static void kasan_populate_shadow(void)
crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
+ __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER);
+ __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER);
+ __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER);
+ __arch_set_page_dat(kasan_early_shadow_pte, 1);
/*
* Current memory layout:
@@ -223,6 +228,7 @@ static void *boot_crst_alloc(unsigned long val)
table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
crst_table_init(table, val);
+ __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
}
@@ -238,6 +244,7 @@ static pte_t *boot_pte_alloc(void)
if (!pte_leftover) {
pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
pte = pte_leftover + _PAGE_TABLE_SIZE;
+ __arch_set_page_dat(pte, 1);
} else {
pte = pte_leftover;
pte_leftover = NULL;
@@ -418,6 +425,14 @@ void setup_vmem(unsigned long asce_limit)
unsigned long asce_bits;
int i;
+ /*
+ * Mark whole memory as no-dat. This must be done before any
+ * page tables are allocated, or kernel image builtin pages
+ * are marked as dat tables.
+ */
+ for_each_physmem_online_range(i, &start, &end)
+ __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT);
+
if (asce_limit == _REGION1_SIZE) {
asce_type = _REGION2_ENTRY_EMPTY;
asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
@@ -429,6 +444,8 @@ void setup_vmem(unsigned long asce_limit)
crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
+ __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
+ __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
/*
* To allow prefixing the lowcore must be mapped with 4KB pages.
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 829d68e2c68582..bb1b4bef1878b2 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -11,7 +11,6 @@ typedef struct {
cpumask_t cpu_attach_mask;
atomic_t flush_count;
unsigned int flush_mm;
- struct list_head pgtable_list;
struct list_head gmap_list;
unsigned long gmap_asce;
unsigned long asce;
@@ -39,7 +38,6 @@ typedef struct {
#define INIT_MM_CONTEXT(name) \
.context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \
- .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 757fe6f0d802c0..929af18b09081a 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -22,7 +22,6 @@ static inline int init_new_context(struct task_struct *tsk,
unsigned long asce_type, init_entry;
spin_lock_init(&mm->context.lock);
- INIT_LIST_HEAD(&mm->context.pgtable_list);
INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.flush_count, 0);
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
index c33c4deb545ffd..08fcbd6281208e 100644
--- a/arch/s390/include/asm/page-states.h
+++ b/arch/s390/include/asm/page-states.h
@@ -7,6 +7,9 @@
#ifndef PAGE_STATES_H
#define PAGE_STATES_H
+#include <asm/sections.h>
+#include <asm/page.h>
+
#define ESSA_GET_STATE 0
#define ESSA_SET_STABLE 1
#define ESSA_SET_UNUSED 2
@@ -18,4 +21,60 @@
#define ESSA_MAX ESSA_SET_STABLE_NODAT
+extern int __bootdata_preserved(cmma_flag);
+
+static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd)
+{
+ unsigned long rc;
+
+ asm volatile(
+ " .insn rrf,0xb9ab0000,%[rc],%[paddr],%[cmd],0"
+ : [rc] "=d" (rc)
+ : [paddr] "d" (paddr),
+ [cmd] "i" (cmd));
+ return rc;
+}
+
+static __always_inline void __set_page_state(void *addr, unsigned long num_pages, unsigned char cmd)
+{
+ unsigned long paddr = __pa(addr) & PAGE_MASK;
+
+ while (num_pages--) {
+ essa(paddr, cmd);
+ paddr += PAGE_SIZE;
+ }
+}
+
+static inline void __set_page_unused(void *addr, unsigned long num_pages)
+{
+ __set_page_state(addr, num_pages, ESSA_SET_UNUSED);
+}
+
+static inline void __set_page_stable_dat(void *addr, unsigned long num_pages)
+{
+ __set_page_state(addr, num_pages, ESSA_SET_STABLE);
+}
+
+static inline void __set_page_stable_nodat(void *addr, unsigned long num_pages)
+{
+ __set_page_state(addr, num_pages, ESSA_SET_STABLE_NODAT);
+}
+
+static inline void __arch_set_page_nodat(void *addr, unsigned long num_pages)
+{
+ if (!cmma_flag)
+ return;
+ if (cmma_flag < 2)
+ __set_page_stable_dat(addr, num_pages);
+ else
+ __set_page_stable_nodat(addr, num_pages);
+}
+
+static inline void __arch_set_page_dat(void *addr, unsigned long num_pages)
+{
+ if (!cmma_flag)
+ return;
+ __set_page_stable_dat(addr, num_pages);
+}
+
#endif
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index cfec0743314eaa..73b9c3bf377f88 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -164,7 +164,6 @@ static inline int page_reset_referenced(unsigned long addr)
struct page;
void arch_free_page(struct page *page, int order);
void arch_alloc_page(struct page *page, int order);
-void arch_set_page_dat(struct page *page, int order);
static inline int devmem_is_allowed(unsigned long pfn)
{
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 376b4b23bdaa34..502d655fe6ae66 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -25,7 +25,6 @@ void crst_table_free(struct mm_struct *, unsigned long *);
unsigned long *page_table_alloc(struct mm_struct *);
struct page *page_table_alloc_pgste(struct mm_struct *mm);
void page_table_free(struct mm_struct *, unsigned long *);
-void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
void page_table_free_pgste(struct page *page);
extern int page_table_allocate_pgste;
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 25cadc2b9cff27..df316436d2e140 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -125,9 +125,6 @@ static inline void vmcp_cma_reserve(void) { }
void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
-void cmma_init(void);
-void cmma_init_nodat(void);
-
extern void (*_machine_restart)(char *command);
extern void (*_machine_halt)(void);
extern void (*_machine_power_off)(void);
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 78f7b729b65f10..31ec4f545e036a 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -6,6 +6,13 @@
#include <linux/ptrace.h>
#include <asm/switch_to.h>
+struct stack_frame_user {
+ unsigned long back_chain;
+ unsigned long empty1[5];
+ unsigned long gprs[10];
+ unsigned long empty2[4];
+};
+
enum stack_type {
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 383b1f91442c99..d1455a601adcad 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -69,12 +69,9 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_pmds = 1;
- /*
- * page_table_free_rcu takes care of the allocation bit masks
- * of the 2K table fragments in the 4K page table page,
- * then calls tlb_remove_table.
- */
- page_table_free_rcu(tlb, (unsigned long *) pte, address);
+ if (mm_alloc_pgste(tlb->mm))
+ gmap_unlink(tlb->mm, (unsigned long *)pte, address);
+ tlb_remove_ptdesc(tlb, pte);
}
/*
@@ -112,7 +109,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
__tlb_adjust_range(tlb, address, PAGE_SIZE);
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
- tlb_remove_table(tlb, p4d);
+ tlb_remove_ptdesc(tlb, p4d);
}
/*
@@ -130,7 +127,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_p4ds = 1;
- tlb_remove_table(tlb, pud);
+ tlb_remove_ptdesc(tlb, pud);
}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index ff1f02b54771cb..eb43e5922a25d7 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -46,6 +46,7 @@ decompressor_handled_param(vmalloc);
decompressor_handled_param(dfltcc);
decompressor_handled_param(facilities);
decompressor_handled_param(nokaslr);
+decompressor_handled_param(cmma);
#if IS_ENABLED(CONFIG_KVM)
decompressor_handled_param(prot_virt);
#endif
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index c27321cb0969fb..dfa77da2fd2ec5 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -15,7 +15,10 @@
#include <linux/export.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
#include <linux/sysfs.h>
+#include <asm/stacktrace.h>
#include <asm/irq.h>
#include <asm/cpu_mf.h>
#include <asm/lowcore.h>
@@ -212,6 +215,44 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
}
}
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ struct stack_frame_user __user *sf;
+ unsigned long ip, sp;
+ bool first = true;
+
+ if (is_compat_task())
+ return;
+ perf_callchain_store(entry, instruction_pointer(regs));
+ sf = (void __user *)user_stack_pointer(regs);
+ pagefault_disable();
+ while (entry->nr < entry->max_stack) {
+ if (__get_user(sp, &sf->back_chain))
+ break;
+ if (__get_user(ip, &sf->gprs[8]))
+ break;
+ if (ip & 0x1) {
+ /*
+ * If the instruction address is invalid, and this
+ * is the first stack frame, assume r14 has not
+ * been written to the stack yet. Otherwise exit.
+ */
+ if (first && !(regs->gprs[14] & 0x1))
+ ip = regs->gprs[14];
+ else
+ break;
+ }
+ perf_callchain_store(entry, ip);
+ /* Sanity check: ABI requires SP to be aligned 8 bytes. */
+ if (!sp || sp & 0x7)
+ break;
+ sf = (void __user *)sp;
+ first = false;
+ }
+ pagefault_enable();
+}
+
/* Perf definitions for PMU event attributes in sysfs */
ssize_t cpumf_events_sysfs_show(struct device *dev,
struct device_attribute *attr, char *page)
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 0787010139f776..94f440e3830319 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -6,9 +6,12 @@
*/
#include <linux/stacktrace.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
#include <asm/kprobes.h>
+#include <asm/ptrace.h>
void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct task_struct *task, struct pt_regs *regs)
@@ -58,3 +61,43 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
return -EINVAL;
return 0;
}
+
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+ const struct pt_regs *regs)
+{
+ struct stack_frame_user __user *sf;
+ unsigned long ip, sp;
+ bool first = true;
+
+ if (is_compat_task())
+ return;
+ if (!consume_entry(cookie, instruction_pointer(regs)))
+ return;
+ sf = (void __user *)user_stack_pointer(regs);
+ pagefault_disable();
+ while (1) {
+ if (__get_user(sp, &sf->back_chain))
+ break;
+ if (__get_user(ip, &sf->gprs[8]))
+ break;
+ if (ip & 0x1) {
+ /*
+ * If the instruction address is invalid, and this
+ * is the first stack frame, assume r14 has not
+ * been written to the stack yet. Otherwise exit.
+ */
+ if (first && !(regs->gprs[14] & 0x1))
+ ip = regs->gprs[14];
+ else
+ break;
+ }
+ if (!consume_entry(cookie, ip))
+ break;
+ /* Sanity check: ABI requires SP to be aligned 8 bytes. */
+ if (!sp || sp & 0x7)
+ break;
+ sf = (void __user *)sp;
+ first = false;
+ }
+ pagefault_enable();
+}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 20786f6883b299..6f96b5a71c6383 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -18,7 +18,7 @@
#include <linux/ksm.h>
#include <linux/mman.h>
#include <linux/pgtable.h>
-
+#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/page.h>
@@ -33,7 +33,7 @@ static struct page *gmap_alloc_crst(void)
page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
if (!page)
return NULL;
- arch_set_page_dat(page, CRST_ALLOC_ORDER);
+ __arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER);
return page;
}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 7eca10c32caa5b..43e612bc2bcd34 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -164,14 +164,10 @@ void __init mem_init(void)
pv_init();
kfence_split_mapping();
- /* Setup guest page hinting */
- cmma_init();
/* this will put all low memory onto the freelists */
memblock_free_all();
setup_zero_pages(); /* Setup zeroed pages. */
-
- cmma_init_nodat();
}
void free_initmem(void)
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index a31acb2c4ef24c..01f9b39e65f5b0 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -7,212 +7,18 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/types.h>
#include <linux/mm.h>
-#include <linux/memblock.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <asm/asm-extable.h>
-#include <asm/facility.h>
#include <asm/page-states.h>
+#include <asm/sections.h>
+#include <asm/page.h>
-static int cmma_flag = 1;
-
-static int __init cmma(char *str)
-{
- bool enabled;
-
- if (!kstrtobool(str, &enabled))
- cmma_flag = enabled;
- return 1;
-}
-__setup("cmma=", cmma);
-
-static inline int cmma_test_essa(void)
-{
- unsigned long tmp = 0;
- int rc = -EOPNOTSUPP;
-
- /* test ESSA_GET_STATE */
- asm volatile(
- " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
- "0: la %[rc],0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : [rc] "+&d" (rc), [tmp] "+&d" (tmp)
- : [cmd] "i" (ESSA_GET_STATE));
- return rc;
-}
-
-void __init cmma_init(void)
-{
- if (!cmma_flag)
- return;
- if (cmma_test_essa()) {
- cmma_flag = 0;
- return;
- }
- if (test_facility(147))
- cmma_flag = 2;
-}
-
-static inline void set_page_unused(struct page *page, int order)
-{
- int i, rc;
-
- for (i = 0; i < (1 << order); i++)
- asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
- : "=&d" (rc)
- : "a" (page_to_phys(page + i)),
- "i" (ESSA_SET_UNUSED));
-}
-
-static inline void set_page_stable_dat(struct page *page, int order)
-{
- int i, rc;
-
- for (i = 0; i < (1 << order); i++)
- asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
- : "=&d" (rc)
- : "a" (page_to_phys(page + i)),
- "i" (ESSA_SET_STABLE));
-}
-
-static inline void set_page_stable_nodat(struct page *page, int order)
-{
- int i, rc;
-
- for (i = 0; i < (1 << order); i++)
- asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
- : "=&d" (rc)
- : "a" (page_to_phys(page + i)),
- "i" (ESSA_SET_STABLE_NODAT));
-}
-
-static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
-{
- unsigned long next;
- struct page *page;
- pmd_t *pmd;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none(*pmd) || pmd_large(*pmd))
- continue;
- page = phys_to_page(pmd_val(*pmd));
- set_bit(PG_arch_1, &page->flags);
- } while (pmd++, addr = next, addr != end);
-}
-
-static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
-{
- unsigned long next;
- struct page *page;
- pud_t *pud;
- int i;
-
- pud = pud_offset(p4d, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none(*pud) || pud_large(*pud))
- continue;
- if (!pud_folded(*pud)) {
- page = phys_to_page(pud_val(*pud));
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
- }
- mark_kernel_pmd(pud, addr, next);
- } while (pud++, addr = next, addr != end);
-}
-
-static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
-{
- unsigned long next;
- struct page *page;
- p4d_t *p4d;
- int i;
-
- p4d = p4d_offset(pgd, addr);
- do {
- next = p4d_addr_end(addr, end);
- if (p4d_none(*p4d))
- continue;
- if (!p4d_folded(*p4d)) {
- page = phys_to_page(p4d_val(*p4d));
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
- }
- mark_kernel_pud(p4d, addr, next);
- } while (p4d++, addr = next, addr != end);
-}
-
-static void mark_kernel_pgd(void)
-{
- unsigned long addr, next, max_addr;
- struct page *page;
- pgd_t *pgd;
- int i;
-
- addr = 0;
- /*
- * Figure out maximum virtual address accessible with the
- * kernel ASCE. This is required to keep the page table walker
- * from accessing non-existent entries.
- */
- max_addr = (S390_lowcore.kernel_asce.val & _ASCE_TYPE_MASK) >> 2;
- max_addr = 1UL << (max_addr * 11 + 31);
- pgd = pgd_offset_k(addr);
- do {
- next = pgd_addr_end(addr, max_addr);
- if (pgd_none(*pgd))
- continue;
- if (!pgd_folded(*pgd)) {
- page = phys_to_page(pgd_val(*pgd));
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
- }
- mark_kernel_p4d(pgd, addr, next);
- } while (pgd++, addr = next, addr != max_addr);
-}
-
-void __init cmma_init_nodat(void)
-{
- struct page *page;
- unsigned long start, end, ix;
- int i;
-
- if (cmma_flag < 2)
- return;
- /* Mark pages used in kernel page tables */
- mark_kernel_pgd();
- page = virt_to_page(&swapper_pg_dir);
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
- page = virt_to_page(&invalid_pg_dir);
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
-
- /* Set all kernel pages not used for page tables to stable/no-dat */
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
- page = pfn_to_page(start);
- for (ix = start; ix < end; ix++, page++) {
- if (__test_and_clear_bit(PG_arch_1, &page->flags))
- continue; /* skip page table pages */
- if (!list_empty(&page->lru))
- continue; /* skip free pages */
- set_page_stable_nodat(page, 0);
- }
- }
-}
+int __bootdata_preserved(cmma_flag);
void arch_free_page(struct page *page, int order)
{
if (!cmma_flag)
return;
- set_page_unused(page, order);
+ __set_page_unused(page_to_virt(page), 1UL << order);
}
void arch_alloc_page(struct page *page, int order)
@@ -220,14 +26,7 @@ void arch_alloc_page(struct page *page, int order)
if (!cmma_flag)
return;
if (cmma_flag < 2)
- set_page_stable_dat(page, order);
+ __set_page_stable_dat(page_to_virt(page), 1UL << order);
else
- set_page_stable_nodat(page, order);
-}
-
-void arch_set_page_dat(struct page *page, int order)
-{
- if (!cmma_flag)
- return;
- set_page_stable_dat(page, order);
+ __set_page_stable_nodat(page_to_virt(page), 1UL << order);
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 5488ae17318ee3..008e487c94a631 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -10,6 +10,7 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <asm/mmu_context.h>
+#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/tlb.h>
@@ -43,11 +44,13 @@ __initcall(page_table_register_sysctl);
unsigned long *crst_table_alloc(struct mm_struct *mm)
{
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
+ unsigned long *table;
if (!ptdesc)
return NULL;
- arch_set_page_dat(ptdesc_page(ptdesc), CRST_ALLOC_ORDER);
- return (unsigned long *) ptdesc_to_virt(ptdesc);
+ table = ptdesc_to_virt(ptdesc);
+ __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
+ return table;
}
void crst_table_free(struct mm_struct *mm, unsigned long *table)
@@ -130,11 +133,6 @@ err_p4d:
return -ENOMEM;
}
-static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
-{
- return atomic_fetch_xor(bits, v) ^ bits;
-}
-
#ifdef CONFIG_PGSTE
struct page *page_table_alloc_pgste(struct mm_struct *mm)
@@ -145,7 +143,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
ptdesc = pagetable_alloc(GFP_KERNEL, 0);
if (ptdesc) {
table = (u64 *)ptdesc_to_virt(ptdesc);
- arch_set_page_dat(virt_to_page(table), 0);
+ __arch_set_page_dat(table, 1);
memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
}
@@ -159,125 +157,11 @@ void page_table_free_pgste(struct page *page)
#endif /* CONFIG_PGSTE */
-/*
- * A 2KB-pgtable is either upper or lower half of a normal page.
- * The second half of the page may be unused or used as another
- * 2KB-pgtable.
- *
- * Whenever possible the parent page for a new 2KB-pgtable is picked
- * from the list of partially allocated pages mm_context_t::pgtable_list.
- * In case the list is empty a new parent page is allocated and added to
- * the list.
- *
- * When a parent page gets fully allocated it contains 2KB-pgtables in both
- * upper and lower halves and is removed from mm_context_t::pgtable_list.
- *
- * When 2KB-pgtable is freed from to fully allocated parent page that
- * page turns partially allocated and added to mm_context_t::pgtable_list.
- *
- * If 2KB-pgtable is freed from the partially allocated parent page that
- * page turns unused and gets removed from mm_context_t::pgtable_list.
- * Furthermore, the unused parent page is released.
- *
- * As follows from the above, no unallocated or fully allocated parent
- * pages are contained in mm_context_t::pgtable_list.
- *
- * The upper byte (bits 24-31) of the parent page _refcount is used
- * for tracking contained 2KB-pgtables and has the following format:
- *
- * PP AA
- * 01234567 upper byte (bits 24-31) of struct page::_refcount
- * || ||
- * || |+--- upper 2KB-pgtable is allocated
- * || +---- lower 2KB-pgtable is allocated
- * |+------- upper 2KB-pgtable is pending for removal
- * +-------- lower 2KB-pgtable is pending for removal
- *
- * (See commit 620b4e903179 ("s390: use _refcount for pgtables") on why
- * using _refcount is possible).
- *
- * When 2KB-pgtable is allocated the corresponding AA bit is set to 1.
- * The parent page is either:
- * - added to mm_context_t::pgtable_list in case the second half of the
- * parent page is still unallocated;
- * - removed from mm_context_t::pgtable_list in case both hales of the
- * parent page are allocated;
- * These operations are protected with mm_context_t::lock.
- *
- * When 2KB-pgtable is deallocated the corresponding AA bit is set to 0
- * and the corresponding PP bit is set to 1 in a single atomic operation.
- * Thus, PP and AA bits corresponding to the same 2KB-pgtable are mutually
- * exclusive and may never be both set to 1!
- * The parent page is either:
- * - added to mm_context_t::pgtable_list in case the second half of the
- * parent page is still allocated;
- * - removed from mm_context_t::pgtable_list in case the second half of
- * the parent page is unallocated;
- * These operations are protected with mm_context_t::lock.
- *
- * It is important to understand that mm_context_t::lock only protects
- * mm_context_t::pgtable_list and AA bits, but not the parent page itself
- * and PP bits.
- *
- * Releasing the parent page happens whenever the PP bit turns from 1 to 0,
- * while both AA bits and the second PP bit are already unset. Then the
- * parent page does not contain any 2KB-pgtable fragment anymore, and it has
- * also been removed from mm_context_t::pgtable_list. It is safe to release
- * the page therefore.
- *
- * PGSTE memory spaces use full 4KB-pgtables and do not need most of the
- * logic described above. Both AA bits are set to 1 to denote a 4KB-pgtable
- * while the PP bits are never used, nor such a page is added to or removed
- * from mm_context_t::pgtable_list.
- *
- * pte_free_defer() overrides those rules: it takes the page off pgtable_list,
- * and prevents both 2K fragments from being reused. pte_free_defer() has to
- * guarantee that its pgtable cannot be reused before the RCU grace period
- * has elapsed (which page_table_free_rcu() does not actually guarantee).
- * But for simplicity, because page->rcu_head overlays page->lru, and because
- * the RCU callback might not be called before the mm_context_t has been freed,
- * pte_free_defer() in this implementation prevents both fragments from being
- * reused, and delays making the call to RCU until both fragments are freed.
- */
unsigned long *page_table_alloc(struct mm_struct *mm)
{
- unsigned long *table;
struct ptdesc *ptdesc;
- unsigned int mask, bit;
-
- /* Try to get a fragment of a 4K page as a 2K page table */
- if (!mm_alloc_pgste(mm)) {
- table = NULL;
- spin_lock_bh(&mm->context.lock);
- if (!list_empty(&mm->context.pgtable_list)) {
- ptdesc = list_first_entry(&mm->context.pgtable_list,
- struct ptdesc, pt_list);
- mask = atomic_read(&ptdesc->_refcount) >> 24;
- /*
- * The pending removal bits must also be checked.
- * Failure to do so might lead to an impossible
- * value of (i.e 0x13 or 0x23) written to _refcount.
- * Such values violate the assumption that pending and
- * allocation bits are mutually exclusive, and the rest
- * of the code unrails as result. That could lead to
- * a whole bunch of races and corruptions.
- */
- mask = (mask | (mask >> 4)) & 0x03U;
- if (mask != 0x03U) {
- table = (unsigned long *) ptdesc_to_virt(ptdesc);
- bit = mask & 1; /* =1 -> second 2K */
- if (bit)
- table += PTRS_PER_PTE;
- atomic_xor_bits(&ptdesc->_refcount,
- 0x01U << (bit + 24));
- list_del_init(&ptdesc->pt_list);
- }
- }
- spin_unlock_bh(&mm->context.lock);
- if (table)
- return table;
- }
- /* Allocate a fresh page */
+ unsigned long *table;
+
ptdesc = pagetable_alloc(GFP_KERNEL, 0);
if (!ptdesc)
return NULL;
@@ -285,177 +169,57 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
pagetable_free(ptdesc);
return NULL;
}
- arch_set_page_dat(ptdesc_page(ptdesc), 0);
- /* Initialize page table */
- table = (unsigned long *) ptdesc_to_virt(ptdesc);
- if (mm_alloc_pgste(mm)) {
- /* Return 4K page table with PGSTEs */
- INIT_LIST_HEAD(&ptdesc->pt_list);
- atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
- memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
- memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
- } else {
- /* Return the first 2K fragment of the page */
- atomic_xor_bits(&ptdesc->_refcount, 0x01U << 24);
- memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
- spin_lock_bh(&mm->context.lock);
- list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
- spin_unlock_bh(&mm->context.lock);
- }
+ table = ptdesc_to_virt(ptdesc);
+ __arch_set_page_dat(table, 1);
+ /* pt_list is used by gmap only */
+ INIT_LIST_HEAD(&ptdesc->pt_list);
+ memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
+ memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
return table;
}
-static void page_table_release_check(struct page *page, void *table,
- unsigned int half, unsigned int mask)
+static void pagetable_pte_dtor_free(struct ptdesc *ptdesc)
{
- char msg[128];
-
- if (!IS_ENABLED(CONFIG_DEBUG_VM))
- return;
- if (!mask && list_empty(&page->lru))
- return;
- snprintf(msg, sizeof(msg),
- "Invalid pgtable %p release half 0x%02x mask 0x%02x",
- table, half, mask);
- dump_page(page, msg);
-}
-
-static void pte_free_now(struct rcu_head *head)
-{
- struct ptdesc *ptdesc;
-
- ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
pagetable_pte_dtor(ptdesc);
pagetable_free(ptdesc);
}
void page_table_free(struct mm_struct *mm, unsigned long *table)
{
- unsigned int mask, bit, half;
struct ptdesc *ptdesc = virt_to_ptdesc(table);
- if (!mm_alloc_pgste(mm)) {
- /* Free 2K page table fragment of a 4K page */
- bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.lock);
- /*
- * Mark the page for delayed release. The actual release
- * will happen outside of the critical section from this
- * function or from __tlb_remove_table()
- */
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
- mask >>= 24;
- if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
- /*
- * Other half is allocated, and neither half has had
- * its free deferred: add page to head of list, to make
- * this freed half available for immediate reuse.
- */
- list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
- } else {
- /* If page is on list, now remove it. */
- list_del_init(&ptdesc->pt_list);
- }
- spin_unlock_bh(&mm->context.lock);
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x10U << (bit + 24));
- mask >>= 24;
- if (mask != 0x00U)
- return;
- half = 0x01U << bit;
- } else {
- half = 0x03U;
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
- mask >>= 24;
- }
-
- page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
- if (folio_test_clear_active(ptdesc_folio(ptdesc)))
- call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
- else
- pte_free_now(&ptdesc->pt_rcu_head);
+ pagetable_pte_dtor_free(ptdesc);
}
-void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
- unsigned long vmaddr)
+void __tlb_remove_table(void *table)
{
- struct mm_struct *mm;
- unsigned int bit, mask;
struct ptdesc *ptdesc = virt_to_ptdesc(table);
+ struct page *page = ptdesc_page(ptdesc);
- mm = tlb->mm;
- if (mm_alloc_pgste(mm)) {
- gmap_unlink(mm, table, vmaddr);
- table = (unsigned long *) ((unsigned long)table | 0x03U);
- tlb_remove_ptdesc(tlb, table);
+ if (compound_order(page) == CRST_ALLOC_ORDER) {
+ /* pmd, pud, or p4d */
+ pagetable_free(ptdesc);
return;
}
- bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.lock);
- /*
- * Mark the page for delayed release. The actual release will happen
- * outside of the critical section from __tlb_remove_table() or from
- * page_table_free()
- */
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
- mask >>= 24;
- if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
- /*
- * Other half is allocated, and neither half has had
- * its free deferred: add page to end of list, to make
- * this freed half available for reuse once its pending
- * bit has been cleared by __tlb_remove_table().
- */
- list_add_tail(&ptdesc->pt_list, &mm->context.pgtable_list);
- } else {
- /* If page is on list, now remove it. */
- list_del_init(&ptdesc->pt_list);
- }
- spin_unlock_bh(&mm->context.lock);
- table = (unsigned long *) ((unsigned long) table | (0x01U << bit));
- tlb_remove_table(tlb, table);
+ pagetable_pte_dtor_free(ptdesc);
}
-void __tlb_remove_table(void *_table)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void pte_free_now(struct rcu_head *head)
{
- unsigned int mask = (unsigned long) _table & 0x03U, half = mask;
- void *table = (void *)((unsigned long) _table ^ mask);
- struct ptdesc *ptdesc = virt_to_ptdesc(table);
+ struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
- switch (half) {
- case 0x00U: /* pmd, pud, or p4d */
- pagetable_free(ptdesc);
- return;
- case 0x01U: /* lower 2K of a 4K page table */
- case 0x02U: /* higher 2K of a 4K page table */
- mask = atomic_xor_bits(&ptdesc->_refcount, mask << (4 + 24));
- mask >>= 24;
- if (mask != 0x00U)
- return;
- break;
- case 0x03U: /* 4K page table with pgstes */
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
- mask >>= 24;
- break;
- }
-
- page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
- if (folio_test_clear_active(ptdesc_folio(ptdesc)))
- call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
- else
- pte_free_now(&ptdesc->pt_rcu_head);
+ pagetable_pte_dtor_free(ptdesc);
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
{
- struct page *page;
+ struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
- page = virt_to_page(pgtable);
- SetPageActive(page);
- page_table_free(mm, (unsigned long *)pgtable);
+ call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
/*
- * page_table_free() does not do the pgste gmap_unlink() which
- * page_table_free_rcu() does: warn us if pgste ever reaches here.
+ * THPs are not allowed for KVM guests. Warn if pgste ever reaches here.
+ * Turn to the generic pte_free_defer() version once gmap is removed.
*/
WARN_ON_ONCE(mm_has_pgste(mm));
}
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 2e8a1064f103ac..186a020857cf6a 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -50,8 +50,7 @@ void *vmem_crst_alloc(unsigned long val)
if (!table)
return NULL;
crst_table_init(table, val);
- if (slab_is_available())
- arch_set_page_dat(virt_to_page(table), CRST_ALLOC_ORDER);
+ __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
}
@@ -67,6 +66,7 @@ pte_t __ref *vmem_pte_alloc(void)
if (!pte)
return NULL;
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+ __arch_set_page_dat(pte, 1);
return pte;
}
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index d09e08b71cfba0..5dd33155d5d503 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -352,7 +352,7 @@ EXPORT_SYMBOL(ap_test_config_ctrl_domain);
/*
* ap_queue_info(): Check and get AP queue info.
* Returns: 1 if APQN exists and info is filled,
- * 0 if APQN seems to exit but there is no info
+ * 0 if APQN seems to exist but there is no info
* available (eg. caused by an asynch pending error)
* -1 invalid APQN, TAPQ error or AP queue status which
* indicates there is no APQN.
@@ -373,36 +373,33 @@ static int ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac,
/* call TAPQ on this APQN */
status = ap_test_queue(qid, ap_apft_available(), &tapq_info);
- /* handle pending async error with return 'no info available' */
- if (status.async)
- return 0;
-
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
case AP_RESPONSE_RESET_IN_PROGRESS:
case AP_RESPONSE_DECONFIGURED:
case AP_RESPONSE_CHECKSTOPPED:
case AP_RESPONSE_BUSY:
- /*
- * According to the architecture in all these cases the
- * info should be filled. All bits 0 is not possible as
- * there is at least one of the mode bits set.
- */
- if (WARN_ON_ONCE(!tapq_info.value))
- return 0;
- *q_type = tapq_info.at;
- *q_fac = tapq_info.fac;
- *q_depth = tapq_info.qd;
- *q_ml = tapq_info.ml;
- *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED;
- *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED;
- return 1;
+ /* For all these RCs the tapq info should be available */
+ break;
default:
- /*
- * A response code which indicates, there is no info available.
- */
- return -1;
+ /* On a pending async error the info should be available */
+ if (!status.async)
+ return -1;
+ break;
}
+
+ /* There should be at least one of the mode bits set */
+ if (WARN_ON_ONCE(!tapq_info.value))
+ return 0;
+
+ *q_type = tapq_info.at;
+ *q_fac = tapq_info.fac;
+ *q_depth = tapq_info.qd;
+ *q_ml = tapq_info.ml;
+ *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED;
+ *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED;
+
+ return 1;
}
void ap_wait(enum ap_sm_wait wait)
@@ -1022,6 +1019,10 @@ EXPORT_SYMBOL(ap_driver_unregister);
void ap_bus_force_rescan(void)
{
+ /* Only trigger AP bus scans after the initial scan is done */
+ if (atomic64_read(&ap_scan_bus_count) <= 0)
+ return;
+
/* processing a asynchronous bus rescan */
del_timer(&ap_config_timer);
queue_work(system_long_wq, &ap_scan_work);
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 359a35f894d518..b0771ca0849b99 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -206,7 +206,6 @@ struct ap_queue {
bool config; /* configured state */
bool chkstop; /* checkstop state */
ap_qid_t qid; /* AP queue id. */
- bool interrupt; /* indicate if interrupts are enabled */
bool se_bound; /* SE bound state */
unsigned int assoc_idx; /* SE association index */
int queue_count; /* # messages currently on AP queue. */
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 993240370ecf7c..3934a0cc13e762 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -200,13 +200,13 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq)
return AP_SM_WAIT_AGAIN;
}
aq->sm_state = AP_SM_STATE_IDLE;
- return AP_SM_WAIT_NONE;
+ break;
case AP_RESPONSE_NO_PENDING_REPLY:
if (aq->queue_count > 0)
- return aq->interrupt ?
+ return status.irq_enabled ?
AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_HIGH_TIMEOUT;
aq->sm_state = AP_SM_STATE_IDLE;
- return AP_SM_WAIT_NONE;
+ break;
default:
aq->dev_state = AP_DEV_STATE_ERROR;
aq->last_err_rc = status.response_code;
@@ -215,6 +215,16 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq)
AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
return AP_SM_WAIT_NONE;
}
+ /* Check and maybe enable irq support (again) on this queue */
+ if (!status.irq_enabled && status.queue_empty) {
+ void *lsi_ptr = ap_airq_ptr();
+
+ if (lsi_ptr && ap_queue_enable_irq(aq, lsi_ptr) == 0) {
+ aq->sm_state = AP_SM_STATE_SETIRQ_WAIT;
+ return AP_SM_WAIT_AGAIN;
+ }
+ }
+ return AP_SM_WAIT_NONE;
}
/**
@@ -254,7 +264,7 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
fallthrough;
case AP_RESPONSE_Q_FULL:
aq->sm_state = AP_SM_STATE_QUEUE_FULL;
- return aq->interrupt ?
+ return status.irq_enabled ?
AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_HIGH_TIMEOUT;
case AP_RESPONSE_RESET_IN_PROGRESS:
aq->sm_state = AP_SM_STATE_RESET_WAIT;
@@ -307,7 +317,6 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq)
case AP_RESPONSE_NORMAL:
case AP_RESPONSE_RESET_IN_PROGRESS:
aq->sm_state = AP_SM_STATE_RESET_WAIT;
- aq->interrupt = false;
aq->rapq_fbit = 0;
aq->se_bound = false;
return AP_SM_WAIT_LOW_TIMEOUT;
@@ -383,7 +392,6 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq)
if (status.irq_enabled == 1) {
/* Irqs are now enabled */
- aq->interrupt = true;
aq->sm_state = (aq->queue_count > 0) ?
AP_SM_STATE_WORKING : AP_SM_STATE_IDLE;
}
@@ -626,16 +634,21 @@ static ssize_t interrupt_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct ap_queue *aq = to_ap_queue(dev);
+ struct ap_queue_status status;
int rc = 0;
spin_lock_bh(&aq->lock);
- if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT)
+ if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT) {
rc = sysfs_emit(buf, "Enable Interrupt pending.\n");
- else if (aq->interrupt)
- rc = sysfs_emit(buf, "Interrupts enabled.\n");
- else
- rc = sysfs_emit(buf, "Interrupts disabled.\n");
+ } else {
+ status = ap_tapq(aq->qid, NULL);
+ if (status.irq_enabled)
+ rc = sysfs_emit(buf, "Interrupts enabled.\n");
+ else
+ rc = sysfs_emit(buf, "Interrupts disabled.\n");
+ }
spin_unlock_bh(&aq->lock);
+
return rc;
}
@@ -1032,7 +1045,6 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
if (ap_sb_available() && is_prot_virt_guest())
aq->ap_dev.device.groups = ap_queue_dev_sb_attr_groups;
aq->qid = qid;
- aq->interrupt = false;
spin_lock_init(&aq->lock);
INIT_LIST_HEAD(&aq->pendingq);
INIT_LIST_HEAD(&aq->requestq);
diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c
index c815722d0ac87e..050462d9522244 100644
--- a/drivers/s390/crypto/zcrypt_card.c
+++ b/drivers/s390/crypto/zcrypt_card.c
@@ -52,7 +52,7 @@ static ssize_t online_show(struct device *dev,
{
struct zcrypt_card *zc = dev_get_drvdata(dev);
struct ap_card *ac = to_ap_card(dev);
- int online = ac->config && zc->online ? 1 : 0;
+ int online = ac->config && !ac->chkstop && zc->online ? 1 : 0;
return sysfs_emit(buf, "%d\n", online);
}
@@ -70,7 +70,7 @@ static ssize_t online_store(struct device *dev,
if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
return -EINVAL;
- if (online && !ac->config)
+ if (online && (!ac->config || ac->chkstop))
return -ENODEV;
zc->online = online;
diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c
index 112a80e8e6c2c3..67d8e0ae0eec5e 100644
--- a/drivers/s390/crypto/zcrypt_queue.c
+++ b/drivers/s390/crypto/zcrypt_queue.c
@@ -42,7 +42,7 @@ static ssize_t online_show(struct device *dev,
{
struct zcrypt_queue *zq = dev_get_drvdata(dev);
struct ap_queue *aq = to_ap_queue(dev);
- int online = aq->config && zq->online ? 1 : 0;
+ int online = aq->config && !aq->chkstop && zq->online ? 1 : 0;
return sysfs_emit(buf, "%d\n", online);
}
@@ -59,7 +59,8 @@ static ssize_t online_store(struct device *dev,
if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
return -EINVAL;
- if (online && (!aq->config || !aq->card->config))
+ if (online && (!aq->config || !aq->card->config ||
+ aq->chkstop || aq->card->chkstop))
return -ENODEV;
if (online && !zc->online)
return -EINVAL;