aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 14:25:26 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 14:25:26 -0700
commitb96a3e9142fdf346b05b20e867b4f0dfca119e96 (patch)
treeb338a8f8930abc24888fc3871c6627f6ad46e23b /tools
parent651a00bc56403161351090a9d7ddbd7095975324 (diff)
parent52ae298e3e5c9be5bb95e1c6d9199e5210f2a156 (diff)
downloadlinux-aarch64-b96a3e9142fdf346b05b20e867b4f0dfca119e96.tar.gz
Merge tag 'mm-stable-2023-08-28-18-26' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - Some swap cleanups from Ma Wupeng ("fix WARN_ON in add_to_avail_list") - Peter Xu has a series (mm/gup: Unify hugetlb, speed up thp") which reduces the special-case code for handling hugetlb pages in GUP. It also speeds up GUP handling of transparent hugepages. - Peng Zhang provides some maple tree speedups ("Optimize the fast path of mas_store()"). - Sergey Senozhatsky has improved te performance of zsmalloc during compaction (zsmalloc: small compaction improvements"). - Domenico Cerasuolo has developed additional selftest code for zswap ("selftests: cgroup: add zswap test program"). - xu xin has doe some work on KSM's handling of zero pages. These changes are mainly to enable the user to better understand the effectiveness of KSM's treatment of zero pages ("ksm: support tracking KSM-placed zero-pages"). - Jeff Xu has fixes the behaviour of memfd's MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED sysctl ("mm/memfd: fix sysctl MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED"). - David Howells has fixed an fscache optimization ("mm, netfs, fscache: Stop read optimisation when folio removed from pagecache"). - Axel Rasmussen has given userfaultfd the ability to simulate memory poisoning ("add UFFDIO_POISON to simulate memory poisoning with UFFD"). - Miaohe Lin has contributed some routine maintenance work on the memory-failure code ("mm: memory-failure: remove unneeded PageHuge() check"). - Peng Zhang has contributed some maintenance work on the maple tree code ("Improve the validation for maple tree and some cleanup"). - Hugh Dickins has optimized the collapsing of shmem or file pages into THPs ("mm: free retracted page table by RCU"). - Jiaqi Yan has a patch series which permits us to use the healthy subpages within a hardware poisoned huge page for general purposes ("Improve hugetlbfs read on HWPOISON hugepages"). - Kemeng Shi has done some maintenance work on the pagetable-check code ("Remove unused parameters in page_table_check"). - More folioification work from Matthew Wilcox ("More filesystem folio conversions for 6.6"), ("Followup folio conversions for zswap"). And from ZhangPeng ("Convert several functions in page_io.c to use a folio"). - page_ext cleanups from Kemeng Shi ("minor cleanups for page_ext"). - Baoquan He has converted some architectures to use the GENERIC_IOREMAP ioremap()/iounmap() code ("mm: ioremap: Convert architectures to take GENERIC_IOREMAP way"). - Anshuman Khandual has optimized arm64 tlb shootdown ("arm64: support batched/deferred tlb shootdown during page reclamation/migration"). - Better maple tree lockdep checking from Liam Howlett ("More strict maple tree lockdep"). Liam also developed some efficiency improvements ("Reduce preallocations for maple tree"). - Cleanup and optimization to the secondary IOMMU TLB invalidation, from Alistair Popple ("Invalidate secondary IOMMU TLB on permission upgrade"). - Ryan Roberts fixes some arm64 MM selftest issues ("selftests/mm fixes for arm64"). - Kemeng Shi provides some maintenance work on the compaction code ("Two minor cleanups for compaction"). - Some reduction in mmap_lock pressure from Matthew Wilcox ("Handle most file-backed faults under the VMA lock"). - Aneesh Kumar contributes code to use the vmemmap optimization for DAX on ppc64, under some circumstances ("Add support for DAX vmemmap optimization for ppc64"). - page-ext cleanups from Kemeng Shi ("add page_ext_data to get client data in page_ext"), ("minor cleanups to page_ext header"). - Some zswap cleanups from Johannes Weiner ("mm: zswap: three cleanups"). - kmsan cleanups from ZhangPeng ("minor cleanups for kmsan"). - VMA handling cleanups from Kefeng Wang ("mm: convert to vma_is_initial_heap/stack()"). - DAMON feature work from SeongJae Park ("mm/damon/sysfs-schemes: implement DAMOS tried total bytes file"), ("Extend DAMOS filters for address ranges and DAMON monitoring targets"). - Compaction work from Kemeng Shi ("Fixes and cleanups to compaction"). - Liam Howlett has improved the maple tree node replacement code ("maple_tree: Change replacement strategy"). - ZhangPeng has a general code cleanup - use the K() macro more widely ("cleanup with helper macro K()"). - Aneesh Kumar brings memmap-on-memory to ppc64 ("Add support for memmap on memory feature on ppc64"). - pagealloc cleanups from Kemeng Shi ("Two minor cleanups for pcp list in page_alloc"), ("Two minor cleanups for get pageblock migratetype"). - Vishal Moola introduces a memory descriptor for page table tracking, "struct ptdesc" ("Split ptdesc from struct page"). - memfd selftest maintenance work from Aleksa Sarai ("memfd: cleanups for vm.memfd_noexec"). - MM include file rationalization from Hugh Dickins ("arch: include asm/cacheflush.h in asm/hugetlb.h"). - THP debug output fixes from Hugh Dickins ("mm,thp: fix sloppy text output"). - kmemleak improvements from Xiaolei Wang ("mm/kmemleak: use object_cache instead of kmemleak_initialized"). - More folio-related cleanups from Matthew Wilcox ("Remove _folio_dtor and _folio_order"). - A VMA locking scalability improvement from Suren Baghdasaryan ("Per-VMA lock support for swap and userfaults"). - pagetable handling cleanups from Matthew Wilcox ("New page table range API"). - A batch of swap/thp cleanups from David Hildenbrand ("mm/swap: stop using page->private on tail pages for THP_SWAP + cleanups"). - Cleanups and speedups to the hugetlb fault handling from Matthew Wilcox ("Change calling convention for ->huge_fault"). - Matthew Wilcox has also done some maintenance work on the MM subsystem documentation ("Improve mm documentation"). * tag 'mm-stable-2023-08-28-18-26' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (489 commits) maple_tree: shrink struct maple_tree maple_tree: clean up mas_wr_append() secretmem: convert page_is_secretmem() to folio_is_secretmem() nios2: fix flush_dcache_page() for usage from irq context hugetlb: add documentation for vma_kernel_pagesize() mm: add orphaned kernel-doc to the rst files. mm: fix clean_record_shared_mapping_range kernel-doc mm: fix get_mctgt_type() kernel-doc mm: fix kernel-doc warning from tlb_flush_rmaps() mm: remove enum page_entry_size mm: allow ->huge_fault() to be called without the mmap_lock held mm: move PMD_ORDER to pgtable.h mm: remove checks for pte_index memcg: remove duplication detection for mem_cgroup_uncharge_swap mm/huge_memory: work on folio->swap instead of page->private when splitting folio mm/swap: inline folio_set_swap_entry() and folio_swap_entry() mm/swap: use dedicated entry for swap in folio mm/swap: stop using page->private on tail pages for THP_SWAP selftests/mm: fix WARNING comparing pointer to 0 selftests: cgroup: fix test_kmem_memcg_deletion kernel mem check ...
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/radix-tree/maple.c134
-rw-r--r--tools/testing/selftests/bpf/progs/get_branch_snapshot.c4
-rw-r--r--tools/testing/selftests/cgroup/.gitignore1
-rw-r--r--tools/testing/selftests/cgroup/Makefile2
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c21
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c286
-rw-r--r--tools/testing/selftests/damon/sysfs.sh6
-rw-r--r--tools/testing/selftests/kselftest.h9
-rw-r--r--tools/testing/selftests/kselftest/runner.sh7
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c329
-rw-r--r--tools/testing/selftests/mm/.gitignore1
-rw-r--r--tools/testing/selftests/mm/Makefile81
-rw-r--r--tools/testing/selftests/mm/hugetlb-read-hwpoison.c322
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c200
-rw-r--r--tools/testing/selftests/mm/madv_populate.c26
-rw-r--r--tools/testing/selftests/mm/map_populate.c2
-rw-r--r--tools/testing/selftests/mm/migration.c12
-rw-r--r--tools/testing/selftests/mm/mrelease_test.c1
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh80
-rw-r--r--tools/testing/selftests/mm/settings2
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c4
-rw-r--r--tools/testing/selftests/mm/transhuge-stress.c12
-rw-r--r--tools/testing/selftests/mm/uffd-common.c5
-rw-r--r--tools/testing/selftests/mm/uffd-common.h3
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c32
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c117
-rw-r--r--tools/testing/selftests/mm/va_high_addr_switch.c2
-rw-r--r--tools/testing/selftests/proc/proc-empty-vm.c4
28 files changed, 1513 insertions, 192 deletions
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 75ea2081a317e5..e5da1cad70baf6 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -45,6 +45,13 @@ struct rcu_test_struct2 {
unsigned long last[RCU_RANGE_COUNT];
};
+struct rcu_test_struct3 {
+ struct maple_tree *mt;
+ unsigned long index;
+ unsigned long last;
+ bool stop;
+};
+
struct rcu_reader_struct {
unsigned int id;
int mod;
@@ -34954,6 +34961,70 @@ void run_check_rcu(struct maple_tree *mt, struct rcu_test_struct *vals)
MT_BUG_ON(mt, !vals->seen_entry2);
}
+static void *rcu_slot_store_reader(void *ptr)
+{
+ struct rcu_test_struct3 *test = ptr;
+ MA_STATE(mas, test->mt, test->index, test->index);
+
+ rcu_register_thread();
+
+ rcu_read_lock();
+ while (!test->stop) {
+ mas_walk(&mas);
+ /* The length of growth to both sides must be equal. */
+ RCU_MT_BUG_ON(test, (test->index - mas.index) !=
+ (mas.last - test->last));
+ }
+ rcu_read_unlock();
+
+ rcu_unregister_thread();
+ return NULL;
+}
+
+static noinline void run_check_rcu_slot_store(struct maple_tree *mt)
+{
+ pthread_t readers[20];
+ int range_cnt = 200, i, limit = 10000;
+ unsigned long len = ULONG_MAX / range_cnt, start, end;
+ struct rcu_test_struct3 test = {.stop = false, .mt = mt};
+
+ start = range_cnt / 2 * len;
+ end = start + len - 1;
+ test.index = start;
+ test.last = end;
+
+ for (i = 0; i < range_cnt; i++) {
+ mtree_store_range(mt, i * len, i * len + len - 1,
+ xa_mk_value(i * 100), GFP_KERNEL);
+ }
+
+ mt_set_in_rcu(mt);
+ MT_BUG_ON(mt, !mt_in_rcu(mt));
+
+ for (i = 0; i < ARRAY_SIZE(readers); i++) {
+ if (pthread_create(&readers[i], NULL, rcu_slot_store_reader,
+ &test)) {
+ perror("creating reader thread");
+ exit(1);
+ }
+ }
+
+ usleep(5);
+
+ while (limit--) {
+ /* Step by step, expand the most middle range to both sides. */
+ mtree_store_range(mt, --start, ++end, xa_mk_value(100),
+ GFP_KERNEL);
+ }
+
+ test.stop = true;
+
+ while (i--)
+ pthread_join(readers[i], NULL);
+
+ mt_validate(mt);
+}
+
static noinline
void run_check_rcu_slowread(struct maple_tree *mt, struct rcu_test_struct *vals)
{
@@ -35206,6 +35277,10 @@ static noinline void __init check_rcu_threaded(struct maple_tree *mt)
run_check_rcu(mt, &vals);
mtree_destroy(mt);
+ /* Check expanding range in RCU mode */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ run_check_rcu_slot_store(mt);
+ mtree_destroy(mt);
/* Forward writer for rcu stress */
mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
@@ -35383,7 +35458,9 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
for (i = 0; i <= max; i++)
mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ /* Spanning store */
+ mas_set_range(&mas, 470, 500);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
MT_BUG_ON(mt, allocated == 0);
@@ -35392,105 +35469,108 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
MT_BUG_ON(mt, allocated == 0);
MT_BUG_ON(mt, allocated != 1 + height * 3);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
MT_BUG_ON(mt, allocated != 1 + height * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
MT_BUG_ON(mt, allocated != 1 + height * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
MT_BUG_ON(mt, allocated != 1 + height * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
mas_push_node(&mas, mn);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
MT_BUG_ON(mt, allocated != 1 + height * 3);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ /* Slot store does not need allocations */
+ mas_set_range(&mas, 6, 9);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
- height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ MT_BUG_ON(mt, allocated != 0);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+
+ mas_set_range(&mas, 6, 10);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ MT_BUG_ON(mt, allocated != 1);
mas_store_prealloc(&mas, ptr);
+ MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ /* Split */
+ mas_set_range(&mas, 54, 54);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ MT_BUG_ON(mt, allocated != 1 + height * 2);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
mt_set_non_kernel(1);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL & GFP_NOWAIT) == 0);
+ /* Spanning store */
+ mas_set_range(&mas, 1, 100);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL & GFP_NOWAIT) == 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
MT_BUG_ON(mt, allocated != 0);
mas_destroy(&mas);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ /* Spanning store */
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
MT_BUG_ON(mt, allocated == 0);
MT_BUG_ON(mt, allocated != 1 + height * 3);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
+ mas_set_range(&mas, 0, 200);
mt_set_non_kernel(1);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL & GFP_NOWAIT) == 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL & GFP_NOWAIT) == 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
MT_BUG_ON(mt, allocated != 0);
diff --git a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
index a1b139888048c9..511ac634eef0ed 100644
--- a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
+++ b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
@@ -15,7 +15,7 @@ long total_entries = 0;
#define ENTRY_CNT 32
struct perf_branch_entry entries[ENTRY_CNT] = {};
-static inline bool in_range(__u64 val)
+static inline bool gbs_in_range(__u64 val)
{
return (val >= address_low) && (val < address_high);
}
@@ -31,7 +31,7 @@ int BPF_PROG(test1, int n, int ret)
for (i = 0; i < ENTRY_CNT; i++) {
if (i >= total_entries)
break;
- if (in_range(entries[i].from) && in_range(entries[i].to))
+ if (gbs_in_range(entries[i].from) && gbs_in_range(entries[i].to))
test1_hits++;
else if (!test1_hits)
wasted_entries++;
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index c4a57e69f749e3..4d556df4f77b62 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -5,4 +5,5 @@ test_freezer
test_kmem
test_kill
test_cpu
+test_zswap
wait_inotify
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 3d263747d2ad0b..27dbdd7bb4bb69 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -12,6 +12,7 @@ TEST_GEN_PROGS += test_core
TEST_GEN_PROGS += test_freezer
TEST_GEN_PROGS += test_kill
TEST_GEN_PROGS += test_cpu
+TEST_GEN_PROGS += test_zswap
LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
@@ -23,3 +24,4 @@ $(OUTPUT)/test_core: cgroup_util.c
$(OUTPUT)/test_freezer: cgroup_util.c
$(OUTPUT)/test_kill: cgroup_util.c
$(OUTPUT)/test_cpu: cgroup_util.c
+$(OUTPUT)/test_zswap: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index ed2e50bb1e762f..c82f974b85c94d 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -162,11 +162,11 @@ static int cg_run_in_subcgroups(const char *parent,
* allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
* threads. Then it checks the sanity of numbers on the parent level:
* the total size of the cgroups should be roughly equal to
- * anon + file + slab + kernel_stack.
+ * anon + file + kernel + sock.
*/
static int test_kmem_memcg_deletion(const char *root)
{
- long current, slab, anon, file, kernel_stack, pagetables, percpu, sock, sum;
+ long current, anon, file, kernel, sock, sum;
int ret = KSFT_FAIL;
char *parent;
@@ -184,29 +184,22 @@ static int test_kmem_memcg_deletion(const char *root)
goto cleanup;
current = cg_read_long(parent, "memory.current");
- slab = cg_read_key_long(parent, "memory.stat", "slab ");
anon = cg_read_key_long(parent, "memory.stat", "anon ");
file = cg_read_key_long(parent, "memory.stat", "file ");
- kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack ");
- pagetables = cg_read_key_long(parent, "memory.stat", "pagetables ");
- percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
+ kernel = cg_read_key_long(parent, "memory.stat", "kernel ");
sock = cg_read_key_long(parent, "memory.stat", "sock ");
- if (current < 0 || slab < 0 || anon < 0 || file < 0 ||
- kernel_stack < 0 || pagetables < 0 || percpu < 0 || sock < 0)
+ if (current < 0 || anon < 0 || file < 0 || kernel < 0 || sock < 0)
goto cleanup;
- sum = slab + anon + file + kernel_stack + pagetables + percpu + sock;
+ sum = anon + file + kernel + sock;
if (abs(sum - current) < MAX_VMSTAT_ERROR) {
ret = KSFT_PASS;
} else {
printf("memory.current = %ld\n", current);
- printf("slab + anon + file + kernel_stack = %ld\n", sum);
- printf("slab = %ld\n", slab);
+ printf("anon + file + kernel + sock = %ld\n", sum);
printf("anon = %ld\n", anon);
printf("file = %ld\n", file);
- printf("kernel_stack = %ld\n", kernel_stack);
- printf("pagetables = %ld\n", pagetables);
- printf("percpu = %ld\n", percpu);
+ printf("kernel = %ld\n", kernel);
printf("sock = %ld\n", sock);
}
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
new file mode 100644
index 00000000000000..49def87a909bdb
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/sysinfo.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+static int read_int(const char *path, size_t *value)
+{
+ FILE *file;
+ int ret = 0;
+
+ file = fopen(path, "r");
+ if (!file)
+ return -1;
+ if (fscanf(file, "%ld", value) != 1)
+ ret = -1;
+ fclose(file);
+ return ret;
+}
+
+static int set_min_free_kb(size_t value)
+{
+ FILE *file;
+ int ret;
+
+ file = fopen("/proc/sys/vm/min_free_kbytes", "w");
+ if (!file)
+ return -1;
+ ret = fprintf(file, "%ld\n", value);
+ fclose(file);
+ return ret;
+}
+
+static int read_min_free_kb(size_t *value)
+{
+ return read_int("/proc/sys/vm/min_free_kbytes", value);
+}
+
+static int get_zswap_stored_pages(size_t *value)
+{
+ return read_int("/sys/kernel/debug/zswap/stored_pages", value);
+}
+
+static int get_zswap_written_back_pages(size_t *value)
+{
+ return read_int("/sys/kernel/debug/zswap/written_back_pages", value);
+}
+
+static int allocate_bytes(const char *cgroup, void *arg)
+{
+ size_t size = (size_t)arg;
+ char *mem = (char *)malloc(size);
+
+ if (!mem)
+ return -1;
+ for (int i = 0; i < size; i += 4095)
+ mem[i] = 'a';
+ free(mem);
+ return 0;
+}
+
+/*
+ * When trying to store a memcg page in zswap, if the memcg hits its memory
+ * limit in zswap, writeback should not be triggered.
+ *
+ * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may
+ * not zswap"). Needs to be revised when a per memcg writeback mechanism is
+ * implemented.
+ */
+static int test_no_invasive_cgroup_shrink(const char *root)
+{
+ size_t written_back_before, written_back_after;
+ int ret = KSFT_FAIL;
+ char *test_group;
+
+ /* Set up */
+ test_group = cg_name(root, "no_shrink_test");
+ if (!test_group)
+ goto out;
+ if (cg_create(test_group))
+ goto out;
+ if (cg_write(test_group, "memory.max", "1M"))
+ goto out;
+ if (cg_write(test_group, "memory.zswap.max", "10K"))
+ goto out;
+ if (get_zswap_written_back_pages(&written_back_before))
+ goto out;
+
+ /* Allocate 10x memory.max to push memory into zswap */
+ if (cg_run(test_group, allocate_bytes, (void *)MB(10)))
+ goto out;
+
+ /* Verify that no writeback happened because of the memcg allocation */
+ if (get_zswap_written_back_pages(&written_back_after))
+ goto out;
+ if (written_back_after == written_back_before)
+ ret = KSFT_PASS;
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+struct no_kmem_bypass_child_args {
+ size_t target_alloc_bytes;
+ size_t child_allocated;
+};
+
+static int no_kmem_bypass_child(const char *cgroup, void *arg)
+{
+ struct no_kmem_bypass_child_args *values = arg;
+ void *allocation;
+
+ allocation = malloc(values->target_alloc_bytes);
+ if (!allocation) {
+ values->child_allocated = true;
+ return -1;
+ }
+ for (long i = 0; i < values->target_alloc_bytes; i += 4095)
+ ((char *)allocation)[i] = 'a';
+ values->child_allocated = true;
+ pause();
+ free(allocation);
+ return 0;
+}
+
+/*
+ * When pages owned by a memcg are pushed to zswap by kswapd, they should be
+ * charged to that cgroup. This wasn't the case before commit
+ * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
+ *
+ * The test first allocates memory in a memcg, then raises min_free_kbytes to
+ * a very high value so that the allocation falls below low wm, then makes
+ * another allocation to trigger kswapd that should push the memcg-owned pages
+ * to zswap and verifies that the zswap pages are correctly charged.
+ *
+ * To be run on a VM with at most 4G of memory.
+ */
+static int test_no_kmem_bypass(const char *root)
+{
+ size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
+ struct no_kmem_bypass_child_args *values;
+ size_t trigger_allocation_size;
+ int wait_child_iteration = 0;
+ long stored_pages_threshold;
+ struct sysinfo sys_info;
+ int ret = KSFT_FAIL;
+ int child_status;
+ char *test_group;
+ pid_t child_pid;
+
+ /* Read sys info and compute test values accordingly */
+ if (sysinfo(&sys_info) != 0)
+ return KSFT_FAIL;
+ if (sys_info.totalram > 5000000000)
+ return KSFT_SKIP;
+ values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
+ PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (values == MAP_FAILED)
+ return KSFT_FAIL;
+ if (read_min_free_kb(&min_free_kb_original))
+ return KSFT_FAIL;
+ min_free_kb_high = sys_info.totalram / 2000;
+ min_free_kb_low = sys_info.totalram / 500000;
+ values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
+ sys_info.totalram * 5 / 100;
+ stored_pages_threshold = sys_info.totalram / 5 / 4096;
+ trigger_allocation_size = sys_info.totalram / 20;
+
+ /* Set up test memcg */
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ goto out;
+ test_group = cg_name(root, "kmem_bypass_test");
+ if (!test_group)
+ goto out;
+
+ /* Spawn memcg child and wait for it to allocate */
+ set_min_free_kb(min_free_kb_low);
+ if (cg_create(test_group))
+ goto out;
+ values->child_allocated = false;
+ child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
+ if (child_pid < 0)
+ goto out;
+ while (!values->child_allocated && wait_child_iteration++ < 10000)
+ usleep(1000);
+
+ /* Try to wakeup kswapd and let it push child memory to zswap */
+ set_min_free_kb(min_free_kb_high);
+ for (int i = 0; i < 20; i++) {
+ size_t stored_pages;
+ char *trigger_allocation = malloc(trigger_allocation_size);
+
+ if (!trigger_allocation)
+ break;
+ for (int i = 0; i < trigger_allocation_size; i += 4095)
+ trigger_allocation[i] = 'b';
+ usleep(100000);
+ free(trigger_allocation);
+ if (get_zswap_stored_pages(&stored_pages))
+ break;
+ if (stored_pages < 0)
+ break;
+ /* If memory was pushed to zswap, verify it belongs to memcg */
+ if (stored_pages > stored_pages_threshold) {
+ int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
+ int delta = stored_pages * 4096 - zswapped;
+ int result_ok = delta < stored_pages * 4096 / 4;
+
+ ret = result_ok ? KSFT_PASS : KSFT_FAIL;
+ break;
+ }
+ }
+
+ kill(child_pid, SIGTERM);
+ waitpid(child_pid, &child_status, 0);
+out:
+ set_min_free_kb(min_free_kb_original);
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct zswap_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_no_kmem_bypass),
+ T(test_no_invasive_cgroup_shrink),
+};
+#undef T
+
+static bool zswap_configured(void)
+{
+ return access("/sys/module/zswap", F_OK) == 0;
+}
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ if (!zswap_configured())
+ ksft_exit_skip("zswap isn't configured\n");
+
+ /*
+ * Check that memory controller is available:
+ * memory is listed in cgroup.controllers
+ */
+ if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+ ksft_exit_skip("memory controller isn't available\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ ksft_exit_skip("Failed to set memory controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index bcd4734ca09432..60a9a305aef071 100644
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -84,6 +84,7 @@ test_tried_regions()
{
tried_regions_dir=$1
ensure_dir "$tried_regions_dir" "exist"
+ ensure_file "$tried_regions_dir/total_bytes" "exist" "400"
}
test_stats()
@@ -102,9 +103,14 @@ test_filter()
ensure_file "$filter_dir/type" "exist" "600"
ensure_write_succ "$filter_dir/type" "anon" "valid input"
ensure_write_succ "$filter_dir/type" "memcg" "valid input"
+ ensure_write_succ "$filter_dir/type" "addr" "valid input"
+ ensure_write_succ "$filter_dir/type" "target" "valid input"
ensure_write_fail "$filter_dir/type" "foo" "invalid input"
ensure_file "$filter_dir/matching" "exist" "600"
ensure_file "$filter_dir/memcg_path" "exist" "600"
+ ensure_file "$filter_dir/addr_start" "exist" "600"
+ ensure_file "$filter_dir/addr_end" "exist" "600"
+ ensure_file "$filter_dir/damon_target_idx" "exist" "600"
}
test_filters()
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 829be379545ada..529d29a359002c 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -113,6 +113,15 @@ static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; }
static inline void ksft_print_header(void)
{
+ /*
+ * Force line buffering; If stdout is not connected to a terminal, it
+ * will otherwise default to fully buffered, which can cause output
+ * duplication if there is content in the buffer when fork()ing. If
+ * there is a crash, line buffering also means the most recent output
+ * line will be visible.
+ */
+ setvbuf(stdout, NULL, _IOLBF, 0);
+
if (!(getenv("KSFT_TAP_LEVEL")))
printf("TAP version 13\n");
}
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 1c952d1401d46b..261c73cab41b1d 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -105,15 +105,18 @@ run_one()
echo "# Warning: file $TEST is missing!"
echo "not ok $test_num $TEST_HDR_MSG"
else
+ if [ -x /usr/bin/stdbuf ]; then
+ stdbuf="/usr/bin/stdbuf --output=L "
+ fi
eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}"
- cmd="./$BASENAME_TEST $kselftest_cmd_args"
+ cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args"
if [ ! -x "$TEST" ]; then
echo "# Warning: file $TEST is not executable"
if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ]
then
interpreter=$(head -n 1 "$TEST" | cut -c 3-)
- cmd="$interpreter ./$BASENAME_TEST"
+ cmd="$stdbuf $interpreter ./$BASENAME_TEST"
else
echo "not ok $test_num $TEST_HDR_MSG"
return
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index dba0e8ba002f88..3df00867723910 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -18,6 +18,7 @@
#include <sys/syscall.h>
#include <sys/wait.h>
#include <unistd.h>
+#include <ctype.h>
#include "common.h"
@@ -43,7 +44,6 @@
*/
static size_t mfd_def_size = MFD_DEF_SIZE;
static const char *memfd_str = MEMFD_STR;
-static pid_t spawn_newpid_thread(unsigned int flags, int (*fn)(void *));
static int newpid_thread_fn2(void *arg);
static void join_newpid_thread(pid_t pid);
@@ -96,12 +96,12 @@ static void sysctl_assert_write(const char *val)
int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
if (fd < 0) {
- printf("open sysctl failed\n");
+ printf("open sysctl failed: %m\n");
abort();
}
if (write(fd, val, strlen(val)) < 0) {
- printf("write sysctl failed\n");
+ printf("write sysctl %s failed: %m\n", val);
abort();
}
}
@@ -111,7 +111,7 @@ static void sysctl_fail_write(const char *val)
int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
if (fd < 0) {
- printf("open sysctl failed\n");
+ printf("open sysctl failed: %m\n");
abort();
}
@@ -122,6 +122,33 @@ static void sysctl_fail_write(const char *val)
}
}
+static void sysctl_assert_equal(const char *val)
+{
+ char *p, buf[128] = {};
+ int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC);
+
+ if (fd < 0) {
+ printf("open sysctl failed: %m\n");
+ abort();
+ }
+
+ if (read(fd, buf, sizeof(buf)) < 0) {
+ printf("read sysctl failed: %m\n");
+ abort();
+ }
+
+ /* Strip trailing whitespace. */
+ p = buf;
+ while (!isspace(*p))
+ p++;
+ *p = '\0';
+
+ if (strcmp(buf, val) != 0) {
+ printf("unexpected sysctl value: expected %s, got %s\n", val, buf);
+ abort();
+ }
+}
+
static int mfd_assert_reopen_fd(int fd_in)
{
int fd;
@@ -736,7 +763,7 @@ static int idle_thread_fn(void *arg)
return 0;
}
-static pid_t spawn_idle_thread(unsigned int flags)
+static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg)
{
uint8_t *stack;
pid_t pid;
@@ -747,10 +774,7 @@ static pid_t spawn_idle_thread(unsigned int flags)
abort();
}
- pid = clone(idle_thread_fn,
- stack + STACK_SIZE,
- SIGCHLD | flags,
- NULL);
+ pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg);
if (pid < 0) {
printf("clone() failed: %m\n");
abort();
@@ -759,6 +783,33 @@ static pid_t spawn_idle_thread(unsigned int flags)
return pid;
}
+static void join_thread(pid_t pid)
+{
+ int wstatus;
+
+ if (waitpid(pid, &wstatus, 0) < 0) {
+ printf("newpid thread: waitpid() failed: %m\n");
+ abort();
+ }
+
+ if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
+ printf("newpid thread: exited with non-zero error code %d\n",
+ WEXITSTATUS(wstatus));
+ abort();
+ }
+
+ if (WIFSIGNALED(wstatus)) {
+ printf("newpid thread: killed by signal %d\n",
+ WTERMSIG(wstatus));
+ abort();
+ }
+}
+
+static pid_t spawn_idle_thread(unsigned int flags)
+{
+ return spawn_thread(flags, idle_thread_fn, NULL);
+}
+
static void join_idle_thread(pid_t pid)
{
kill(pid, SIGTERM);
@@ -1111,109 +1162,260 @@ static void test_noexec_seal(void)
close(fd);
}
-static void test_sysctl_child(void)
+static void test_sysctl_sysctl0(void)
{
int fd;
- int pid;
- printf("%s sysctl 0\n", memfd_str);
- sysctl_assert_write("0");
- fd = mfd_assert_new("kern_memfd_sysctl_0",
+ sysctl_assert_equal("0");
+
+ fd = mfd_assert_new("kern_memfd_sysctl_0_dfl",
mfd_def_size,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
-
mfd_assert_mode(fd, 0777);
mfd_assert_has_seals(fd, 0);
mfd_assert_chmod(fd, 0644);
close(fd);
+}
- printf("%s sysctl 1\n", memfd_str);
- sysctl_assert_write("1");
- fd = mfd_assert_new("kern_memfd_sysctl_1",
+static void test_sysctl_set_sysctl0(void)
+{
+ sysctl_assert_write("0");
+ test_sysctl_sysctl0();
+}
+
+static void test_sysctl_sysctl1(void)
+{
+ int fd;
+
+ sysctl_assert_equal("1");
+
+ fd = mfd_assert_new("kern_memfd_sysctl_1_dfl",
mfd_def_size,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
- printf("%s child ns\n", memfd_str);
- pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn2);
- join_newpid_thread(pid);
+ fd = mfd_assert_new("kern_memfd_sysctl_1_exec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0777);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_chmod(fd, 0644);
+ close(fd);
+ fd = mfd_assert_new("kern_memfd_sysctl_1_noexec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
mfd_assert_mode(fd, 0666);
mfd_assert_has_seals(fd, F_SEAL_EXEC);
mfd_fail_chmod(fd, 0777);
- sysctl_fail_write("0");
close(fd);
-
- printf("%s sysctl 2\n", memfd_str);
- sysctl_assert_write("2");
- mfd_fail_new("kern_memfd_sysctl_2",
- MFD_CLOEXEC | MFD_ALLOW_SEALING);
- sysctl_fail_write("0");
- sysctl_fail_write("1");
}
-static int newpid_thread_fn(void *arg)
+static void test_sysctl_set_sysctl1(void)
{
- test_sysctl_child();
- return 0;
+ sysctl_assert_write("1");
+ test_sysctl_sysctl1();
}
-static void test_sysctl_child2(void)
+static void test_sysctl_sysctl2(void)
{
int fd;
- sysctl_fail_write("0");
- fd = mfd_assert_new("kern_memfd_sysctl_1",
+ sysctl_assert_equal("2");
+
+ fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
mfd_def_size,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+
+ mfd_fail_new("kern_memfd_sysctl_2_exec",
+ MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
+ fd = mfd_assert_new("kern_memfd_sysctl_2_noexec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
mfd_assert_mode(fd, 0666);
mfd_assert_has_seals(fd, F_SEAL_EXEC);
mfd_fail_chmod(fd, 0777);
close(fd);
}
-static int newpid_thread_fn2(void *arg)
+static void test_sysctl_set_sysctl2(void)
+{
+ sysctl_assert_write("2");
+ test_sysctl_sysctl2();
+}
+
+static int sysctl_simple_child(void *arg)
+{
+ int fd;
+ int pid;
+
+ printf("%s sysctl 0\n", memfd_str);
+ test_sysctl_set_sysctl0();
+
+ printf("%s sysctl 1\n", memfd_str);
+ test_sysctl_set_sysctl1();
+
+ printf("%s sysctl 0\n", memfd_str);
+ test_sysctl_set_sysctl0();
+
+ printf("%s sysctl 2\n", memfd_str);
+ test_sysctl_set_sysctl2();
+
+ printf("%s sysctl 1\n", memfd_str);
+ test_sysctl_set_sysctl1();
+
+ printf("%s sysctl 0\n", memfd_str);
+ test_sysctl_set_sysctl0();
+
+ return 0;
+}
+
+/*
+ * Test sysctl
+ * A very basic test to make sure the core sysctl semantics work.
+ */
+static void test_sysctl_simple(void)
+{
+ int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
+
+ join_thread(pid);
+}
+
+static int sysctl_nested(void *arg)
{
- test_sysctl_child2();
+ void (*fn)(void) = arg;
+
+ fn();
return 0;
}
-static pid_t spawn_newpid_thread(unsigned int flags, int (*fn)(void *))
+
+static int sysctl_nested_wait(void *arg)
{
- uint8_t *stack;
- pid_t pid;
+ /* Wait for a SIGCONT. */
+ kill(getpid(), SIGSTOP);
+ return sysctl_nested(arg);
+}
- stack = malloc(STACK_SIZE);
- if (!stack) {
- printf("malloc(STACK_SIZE) failed: %m\n");
- abort();
- }
+static void test_sysctl_sysctl1_failset(void)
+{
+ sysctl_fail_write("0");
+ test_sysctl_sysctl1();
+}
- pid = clone(fn,
- stack + STACK_SIZE,
- SIGCHLD | flags,
- NULL);
- if (pid < 0) {
- printf("clone() failed: %m\n");
- abort();
- }
+static void test_sysctl_sysctl2_failset(void)
+{
+ sysctl_fail_write("1");
+ test_sysctl_sysctl2();
- return pid;
+ sysctl_fail_write("0");
+ test_sysctl_sysctl2();
}
-static void join_newpid_thread(pid_t pid)
+static int sysctl_nested_child(void *arg)
{
- waitpid(pid, NULL, 0);
+ int fd;
+ int pid;
+
+ printf("%s nested sysctl 0\n", memfd_str);
+ sysctl_assert_write("0");
+ /* A further nested pidns works the same. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
+ join_thread(pid);
+
+ printf("%s nested sysctl 1\n", memfd_str);
+ sysctl_assert_write("1");
+ /* Child inherits our setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1);
+ join_thread(pid);
+ /* Child cannot raise the setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
+ test_sysctl_sysctl1_failset);
+ join_thread(pid);
+ /* Child can lower the setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
+ test_sysctl_set_sysctl2);
+ join_thread(pid);
+ /* Child lowering the setting has no effect on our setting. */
+ test_sysctl_sysctl1();
+
+ printf("%s nested sysctl 2\n", memfd_str);
+ sysctl_assert_write("2");
+ /* Child inherits our setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2);
+ join_thread(pid);
+ /* Child cannot raise the setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
+ test_sysctl_sysctl2_failset);
+ join_thread(pid);
+
+ /* Verify that the rules are actually inherited after fork. */
+ printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
+ sysctl_assert_write("0");
+
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl1_failset);
+ sysctl_assert_write("1");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
+ sysctl_assert_write("0");
+
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl2_failset);
+ sysctl_assert_write("2");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ /*
+ * Verify that the current effective setting is saved on fork, meaning
+ * that the parent lowering the sysctl doesn't affect already-forked
+ * children.
+ */
+ printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
+ sysctl_assert_write("2");
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl2);
+ sysctl_assert_write("1");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
+ sysctl_assert_write("2");
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl2);
+ sysctl_assert_write("0");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
+ sysctl_assert_write("1");
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl1);
+ sysctl_assert_write("0");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ return 0;
}
/*
- * Test sysctl
- * A very basic sealing test to see whether setting/retrieving seals works.
+ * Test sysctl with nested pid namespaces
+ * Make sure that the sysctl nesting semantics work correctly.
*/
-static void test_sysctl(void)
+static void test_sysctl_nested(void)
{
- int pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn);
+ int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL);
- join_newpid_thread(pid);
+ join_thread(pid);
}
/*
@@ -1399,6 +1601,9 @@ int main(int argc, char **argv)
test_seal_grow();
test_seal_resize();
+ test_sysctl_simple();
+ test_sysctl_nested();
+
test_share_dup("SHARE-DUP", "");
test_share_mmap("SHARE-MMAP", "");
test_share_open("SHARE-OPEN", "");
@@ -1413,8 +1618,6 @@ int main(int argc, char **argv)
test_share_fork("SHARE-FORK", SHARED_FT_STR);
join_idle_thread(pid);
- test_sysctl();
-
printf("memfd: DONE\n");
return 0;
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index 7e2a982383c00e..cdc9ce4426b95e 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -5,6 +5,7 @@ hugepage-mremap
hugepage-shm
hugepage-vmemmap
hugetlb-madvise
+hugetlb-read-hwpoison
khugepaged
map_hugetlb
map_populate
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 66d7c07dc1773b..6a9fc5693145f3 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -35,39 +35,43 @@ MAKEFLAGS += --no-builtin-rules
CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
LDLIBS = -lrt -lpthread
-TEST_GEN_PROGS = cow
-TEST_GEN_PROGS += compaction_test
-TEST_GEN_PROGS += gup_longterm
-TEST_GEN_PROGS += gup_test
-TEST_GEN_PROGS += hmm-tests
-TEST_GEN_PROGS += hugetlb-madvise
-TEST_GEN_PROGS += hugepage-mmap
-TEST_GEN_PROGS += hugepage-mremap
-TEST_GEN_PROGS += hugepage-shm
-TEST_GEN_PROGS += hugepage-vmemmap
-TEST_GEN_PROGS += khugepaged
-TEST_GEN_PROGS += madv_populate
-TEST_GEN_PROGS += map_fixed_noreplace
-TEST_GEN_PROGS += map_hugetlb
-TEST_GEN_PROGS += map_populate
-TEST_GEN_PROGS += memfd_secret
-TEST_GEN_PROGS += migration
-TEST_GEN_PROGS += mkdirty
-TEST_GEN_PROGS += mlock-random-test
-TEST_GEN_PROGS += mlock2-tests
-TEST_GEN_PROGS += mrelease_test
-TEST_GEN_PROGS += mremap_dontunmap
-TEST_GEN_PROGS += mremap_test
-TEST_GEN_PROGS += on-fault-limit
-TEST_GEN_PROGS += thuge-gen
-TEST_GEN_PROGS += transhuge-stress
-TEST_GEN_PROGS += uffd-stress
-TEST_GEN_PROGS += uffd-unit-tests
+TEST_GEN_FILES = cow
+TEST_GEN_FILES += compaction_test
+TEST_GEN_FILES += gup_longterm
+TEST_GEN_FILES += gup_test
+TEST_GEN_FILES += hmm-tests
+TEST_GEN_FILES += hugetlb-madvise
+TEST_GEN_FILES += hugetlb-read-hwpoison
+TEST_GEN_FILES += hugepage-mmap
+TEST_GEN_FILES += hugepage-mremap
+TEST_GEN_FILES += hugepage-shm
+TEST_GEN_FILES += hugepage-vmemmap
+TEST_GEN_FILES += khugepaged
+TEST_GEN_FILES += madv_populate
+TEST_GEN_FILES += map_fixed_noreplace
+TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += map_populate
+TEST_GEN_FILES += memfd_secret
+TEST_GEN_FILES += migration
+TEST_GEN_FILES += mkdirty
+TEST_GEN_FILES += mlock-random-test
+TEST_GEN_FILES += mlock2-tests
+TEST_GEN_FILES += mrelease_test
+TEST_GEN_FILES += mremap_dontunmap
+TEST_GEN_FILES += mremap_test
+TEST_GEN_FILES += on-fault-limit
+TEST_GEN_FILES += thuge-gen
+TEST_GEN_FILES += transhuge-stress
+TEST_GEN_FILES += uffd-stress
+TEST_GEN_FILES += uffd-unit-tests
+TEST_GEN_FILES += split_huge_page_test
+TEST_GEN_FILES += ksm_tests
+TEST_GEN_FILES += ksm_functional_tests
+TEST_GEN_FILES += mdwe_test
+
+ifneq ($(ARCH),arm64)
TEST_GEN_PROGS += soft-dirty
-TEST_GEN_PROGS += split_huge_page_test
-TEST_GEN_PROGS += ksm_tests
-TEST_GEN_PROGS += ksm_functional_tests
-TEST_GEN_PROGS += mdwe_test
+endif
ifeq ($(ARCH),x86_64)
CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32)
@@ -83,24 +87,24 @@ CFLAGS += -no-pie
endif
ifeq ($(CAN_BUILD_I386),1)
-TEST_GEN_PROGS += $(BINARIES_32)
+TEST_GEN_FILES += $(BINARIES_32)
endif
ifeq ($(CAN_BUILD_X86_64),1)
-TEST_GEN_PROGS += $(BINARIES_64)
+TEST_GEN_FILES += $(BINARIES_64)
endif
else
ifneq (,$(findstring $(ARCH),ppc64))
-TEST_GEN_PROGS += protection_keys
+TEST_GEN_FILES += protection_keys
endif
endif
ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64))
-TEST_GEN_PROGS += va_high_addr_switch
-TEST_GEN_PROGS += virtual_address_range
-TEST_GEN_PROGS += write_to_hugetlbfs
+TEST_GEN_FILES += va_high_addr_switch
+TEST_GEN_FILES += virtual_address_range
+TEST_GEN_FILES += write_to_hugetlbfs
endif
TEST_PROGS := run_vmtests.sh
@@ -112,6 +116,7 @@ TEST_FILES += va_high_addr_switch.sh
include ../lib.mk
$(TEST_GEN_PROGS): vm_util.c
+$(TEST_GEN_FILES): vm_util.c
$(OUTPUT)/uffd-stress: uffd-common.c
$(OUTPUT)/uffd-unit-tests: uffd-common.c
diff --git a/tools/testing/selftests/mm/hugetlb-read-hwpoison.c b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
new file mode 100644
index 00000000000000..ba6cc6f9cabcdf
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <linux/magic.h>
+#include <sys/mman.h>
+#include <sys/statfs.h>
+#include <errno.h>
+#include <stdbool.h>
+
+#include "../kselftest.h"
+
+#define PREFIX " ... "
+#define ERROR_PREFIX " !!! "
+
+#define MAX_WRITE_READ_CHUNK_SIZE (getpagesize() * 16)
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+enum test_status {
+ TEST_PASSED = 0,
+ TEST_FAILED = 1,
+ TEST_SKIPPED = 2,
+};
+
+static char *status_to_str(enum test_status status)
+{
+ switch (status) {
+ case TEST_PASSED:
+ return "TEST_PASSED";
+ case TEST_FAILED:
+ return "TEST_FAILED";
+ case TEST_SKIPPED:
+ return "TEST_SKIPPED";
+ default:
+ return "TEST_???";
+ }
+}
+
+static int setup_filemap(char *filemap, size_t len, size_t wr_chunk_size)
+{
+ char iter = 0;
+
+ for (size_t offset = 0; offset < len;
+ offset += wr_chunk_size) {
+ iter++;
+ memset(filemap + offset, iter, wr_chunk_size);
+ }
+
+ return 0;
+}
+
+static bool verify_chunk(char *buf, size_t len, char val)
+{
+ size_t i;
+
+ for (i = 0; i < len; ++i) {
+ if (buf[i] != val) {
+ printf(PREFIX ERROR_PREFIX "check fail: buf[%lu] = %u != %u\n",
+ i, buf[i], val);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool seek_read_hugepage_filemap(int fd, size_t len, size_t wr_chunk_size,
+ off_t offset, size_t expected)
+{
+ char buf[MAX_WRITE_READ_CHUNK_SIZE];
+ ssize_t ret_count = 0;
+ ssize_t total_ret_count = 0;
+ char val = offset / wr_chunk_size + offset % wr_chunk_size;
+
+ printf(PREFIX PREFIX "init val=%u with offset=0x%lx\n", val, offset);
+ printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n",
+ expected);
+ if (lseek(fd, offset, SEEK_SET) < 0) {
+ perror(PREFIX ERROR_PREFIX "seek failed");
+ return false;
+ }
+
+ while (offset + total_ret_count < len) {
+ ret_count = read(fd, buf, wr_chunk_size);
+ if (ret_count == 0) {
+ printf(PREFIX PREFIX "read reach end of the file\n");
+ break;
+ } else if (ret_count < 0) {
+ perror(PREFIX ERROR_PREFIX "read failed");
+ break;
+ }
+ ++val;
+ if (!verify_chunk(buf, ret_count, val))
+ return false;
+
+ total_ret_count += ret_count;
+ }
+ printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n",
+ total_ret_count);
+
+ return total_ret_count == expected;
+}
+
+static bool read_hugepage_filemap(int fd, size_t len,
+ size_t wr_chunk_size, size_t expected)
+{
+ char buf[MAX_WRITE_READ_CHUNK_SIZE];
+ ssize_t ret_count = 0;
+ ssize_t total_ret_count = 0;
+ char val = 0;
+
+ printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n",
+ expected);
+ while (total_ret_count < len) {
+ ret_count = read(fd, buf, wr_chunk_size);
+ if (ret_count == 0) {
+ printf(PREFIX PREFIX "read reach end of the file\n");
+ break;
+ } else if (ret_count < 0) {
+ perror(PREFIX ERROR_PREFIX "read failed");
+ break;
+ }
+ ++val;
+ if (!verify_chunk(buf, ret_count, val))
+ return false;
+
+ total_ret_count += ret_count;
+ }
+ printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n",
+ total_ret_count);
+
+ return total_ret_count == expected;
+}
+
+static enum test_status
+test_hugetlb_read(int fd, size_t len, size_t wr_chunk_size)
+{
+ enum test_status status = TEST_SKIPPED;
+ char *filemap = NULL;
+
+ if (ftruncate(fd, len) < 0) {
+ perror(PREFIX ERROR_PREFIX "ftruncate failed");
+ return status;
+ }
+
+ filemap = mmap(NULL, len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, 0);
+ if (filemap == MAP_FAILED) {
+ perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed");
+ goto done;
+ }
+
+ setup_filemap(filemap, len, wr_chunk_size);
+ status = TEST_FAILED;
+
+ if (read_hugepage_filemap(fd, len, wr_chunk_size, len))
+ status = TEST_PASSED;
+
+ munmap(filemap, len);
+done:
+ if (ftruncate(fd, 0) < 0) {
+ perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed");
+ status = TEST_FAILED;
+ }
+
+ return status;
+}
+
+static enum test_status
+test_hugetlb_read_hwpoison(int fd, size_t len, size_t wr_chunk_size,
+ bool skip_hwpoison_page)
+{
+ enum test_status status = TEST_SKIPPED;
+ char *filemap = NULL;
+ char *hwp_addr = NULL;
+ const unsigned long pagesize = getpagesize();
+
+ if (ftruncate(fd, len) < 0) {
+ perror(PREFIX ERROR_PREFIX "ftruncate failed");
+ return status;
+ }
+
+ filemap = mmap(NULL, len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, 0);
+ if (filemap == MAP_FAILED) {
+ perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed");
+ goto done;
+ }
+
+ setup_filemap(filemap, len, wr_chunk_size);
+ status = TEST_FAILED;
+
+ /*
+ * Poisoned hugetlb page layout (assume hugepagesize=2MB):
+ * |<---------------------- 1MB ---------------------->|
+ * |<---- healthy page ---->|<---- HWPOISON page ----->|
+ * |<------------------- (1MB - 8KB) ----------------->|
+ */
+ hwp_addr = filemap + len / 2 + pagesize;
+ if (madvise(hwp_addr, pagesize, MADV_HWPOISON) < 0) {
+ perror(PREFIX ERROR_PREFIX "MADV_HWPOISON failed");
+ goto unmap;
+ }
+
+ if (!skip_hwpoison_page) {
+ /*
+ * Userspace should be able to read (1MB + 1 page) from
+ * the beginning of the HWPOISONed hugepage.
+ */
+ if (read_hugepage_filemap(fd, len, wr_chunk_size,
+ len / 2 + pagesize))
+ status = TEST_PASSED;
+ } else {
+ /*
+ * Userspace should be able to read (1MB - 2 pages) from
+ * HWPOISONed hugepage.
+ */
+ if (seek_read_hugepage_filemap(fd, len, wr_chunk_size,
+ len / 2 + MAX(2 * pagesize, wr_chunk_size),
+ len / 2 - MAX(2 * pagesize, wr_chunk_size)))
+ status = TEST_PASSED;
+ }
+
+unmap:
+ munmap(filemap, len);
+done:
+ if (ftruncate(fd, 0) < 0) {
+ perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed");
+ status = TEST_FAILED;
+ }
+
+ return status;
+}
+
+static int create_hugetlbfs_file(struct statfs *file_stat)
+{
+ int fd;
+
+ fd = memfd_create("hugetlb_tmp", MFD_HUGETLB);
+ if (fd < 0) {
+ perror(PREFIX ERROR_PREFIX "could not open hugetlbfs file");
+ return -1;
+ }
+
+ memset(file_stat, 0, sizeof(*file_stat));
+ if (fstatfs(fd, file_stat)) {
+ perror(PREFIX ERROR_PREFIX "fstatfs failed");
+ goto close;
+ }
+ if (file_stat->f_type != HUGETLBFS_MAGIC) {
+ printf(PREFIX ERROR_PREFIX "not hugetlbfs file\n");
+ goto close;
+ }
+
+ return fd;
+close:
+ close(fd);
+ return -1;
+}
+
+int main(void)
+{
+ int fd;
+ struct statfs file_stat;
+ enum test_status status;
+ /* Test read() in different granularity. */
+ size_t wr_chunk_sizes[] = {
+ getpagesize() / 2, getpagesize(),
+ getpagesize() * 2, getpagesize() * 4
+ };
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(wr_chunk_sizes); ++i) {
+ printf("Write/read chunk size=0x%lx\n",
+ wr_chunk_sizes[i]);
+
+ fd = create_hugetlbfs_file(&file_stat);
+ if (fd < 0)
+ goto create_failure;
+ printf(PREFIX "HugeTLB read regression test...\n");
+ status = test_hugetlb_read(fd, file_stat.f_bsize,
+ wr_chunk_sizes[i]);
+ printf(PREFIX "HugeTLB read regression test...%s\n",
+ status_to_str(status));
+ close(fd);
+ if (status == TEST_FAILED)
+ return -1;
+
+ fd = create_hugetlbfs_file(&file_stat);
+ if (fd < 0)
+ goto create_failure;
+ printf(PREFIX "HugeTLB read HWPOISON test...\n");
+ status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize,
+ wr_chunk_sizes[i], false);
+ printf(PREFIX "HugeTLB read HWPOISON test...%s\n",
+ status_to_str(status));
+ close(fd);
+ if (status == TEST_FAILED)
+ return -1;
+
+ fd = create_hugetlbfs_file(&file_stat);
+ if (fd < 0)
+ goto create_failure;
+ printf(PREFIX "HugeTLB seek then read HWPOISON test...\n");
+ status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize,
+ wr_chunk_sizes[i], true);
+ printf(PREFIX "HugeTLB seek then read HWPOISON test...%s\n",
+ status_to_str(status));
+ close(fd);
+ if (status == TEST_FAILED)
+ return -1;
+ }
+
+ return 0;
+
+create_failure:
+ printf(ERROR_PREFIX "Abort test: failed to create hugetlbfs file\n");
+ return -1;
+}
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index 26853badae7056..901e950f9138c7 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -27,8 +27,12 @@
#define KiB 1024u
#define MiB (1024 * KiB)
+static int mem_fd;
static int ksm_fd;
static int ksm_full_scans_fd;
+static int proc_self_ksm_stat_fd;
+static int proc_self_ksm_merging_pages_fd;
+static int ksm_use_zero_pages_fd;
static int pagemap_fd;
static size_t pagesize;
@@ -59,6 +63,49 @@ static bool range_maps_duplicates(char *addr, unsigned long size)
return false;
}
+static long get_my_ksm_zero_pages(void)
+{
+ char buf[200];
+ char *substr_ksm_zero;
+ size_t value_pos;
+ ssize_t read_size;
+ unsigned long my_ksm_zero_pages;
+
+ if (!proc_self_ksm_stat_fd)
+ return 0;
+
+ read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0);
+ if (read_size < 0)
+ return -errno;
+
+ buf[read_size] = 0;
+
+ substr_ksm_zero = strstr(buf, "ksm_zero_pages");
+ if (!substr_ksm_zero)
+ return 0;
+
+ value_pos = strcspn(substr_ksm_zero, "0123456789");
+ my_ksm_zero_pages = strtol(substr_ksm_zero + value_pos, NULL, 10);
+
+ return my_ksm_zero_pages;
+}
+
+static long get_my_merging_pages(void)
+{
+ char buf[10];
+ ssize_t ret;
+
+ if (proc_self_ksm_merging_pages_fd < 0)
+ return proc_self_ksm_merging_pages_fd;
+
+ ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0);
+ if (ret <= 0)
+ return -errno;
+ buf[ret] = 0;
+
+ return strtol(buf, NULL, 10);
+}
+
static long ksm_get_full_scans(void)
{
char buf[10];
@@ -91,11 +138,30 @@ static int ksm_merge(void)
return 0;
}
-static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl)
+static int ksm_unmerge(void)
+{
+ if (write(ksm_fd, "2", 1) != 1)
+ return -errno;
+ return 0;
+}
+
+static char *mmap_and_merge_range(char val, unsigned long size, int prot,
+ bool use_prctl)
{
char *map;
int ret;
+ /* Stabilize accounting by disabling KSM completely. */
+ if (ksm_unmerge()) {
+ ksft_test_result_fail("Disabling (unmerging) KSM failed\n");
+ goto unmap;
+ }
+
+ if (get_my_merging_pages() > 0) {
+ ksft_test_result_fail("Still pages merged\n");
+ goto unmap;
+ }
+
map = mmap(NULL, size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANON, -1, 0);
if (map == MAP_FAILED) {
@@ -112,6 +178,11 @@ static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl)
/* Make sure each page contains the same values to merge them. */
memset(map, val, size);
+ if (mprotect(map, size, prot)) {
+ ksft_test_result_skip("mprotect() failed\n");
+ goto unmap;
+ }
+
if (use_prctl) {
ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
if (ret < 0 && errno == EINVAL) {
@@ -131,6 +202,16 @@ static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl)
ksft_test_result_fail("Running KSM failed\n");
goto unmap;
}
+
+ /*
+ * Check if anything was merged at all. Ignore the zero page that is
+ * accounted differently (depending on kernel support).
+ */
+ if (val && !get_my_merging_pages()) {
+ ksft_test_result_fail("No pages got merged\n");
+ goto unmap;
+ }
+
return map;
unmap:
munmap(map, size);
@@ -144,7 +225,7 @@ static void test_unmerge(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, false);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
if (map == MAP_FAILED)
return;
@@ -159,6 +240,70 @@ unmap:
munmap(map, size);
}
+static void test_unmerge_zero_pages(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+ unsigned int offs;
+ unsigned long pages_expected;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ if (proc_self_ksm_stat_fd < 0) {
+ ksft_test_result_skip("open(\"/proc/self/ksm_stat\") failed\n");
+ return;
+ }
+ if (ksm_use_zero_pages_fd < 0) {
+ ksft_test_result_skip("open \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n");
+ return;
+ }
+ if (write(ksm_use_zero_pages_fd, "1", 1) != 1) {
+ ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n");
+ return;
+ }
+
+ /* Let KSM deduplicate zero pages. */
+ map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, false);
+ if (map == MAP_FAILED)
+ return;
+
+ /* Check if ksm_zero_pages is updated correctly after KSM merging */
+ pages_expected = size / pagesize;
+ if (pages_expected != get_my_ksm_zero_pages()) {
+ ksft_test_result_fail("'ksm_zero_pages' updated after merging\n");
+ goto unmap;
+ }
+
+ /* Try to unmerge half of the region */
+ if (madvise(map, size / 2, MADV_UNMERGEABLE)) {
+ ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+ goto unmap;
+ }
+
+ /* Check if ksm_zero_pages is updated correctly after unmerging */
+ pages_expected /= 2;
+ if (pages_expected != get_my_ksm_zero_pages()) {
+ ksft_test_result_fail("'ksm_zero_pages' updated after unmerging\n");
+ goto unmap;
+ }
+
+ /* Trigger unmerging of the other half by writing to the pages. */
+ for (offs = size / 2; offs < size; offs += pagesize)
+ *((unsigned int *)&map[offs]) = offs;
+
+ /* Now we should have no zeropages remaining. */
+ if (get_my_ksm_zero_pages()) {
+ ksft_test_result_fail("'ksm_zero_pages' updated after write fault\n");
+ goto unmap;
+ }
+
+ /* Check if ksm zero pages are really unmerged */
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "KSM zero pages were unmerged\n");
+unmap:
+ munmap(map, size);
+}
+
static void test_unmerge_discarded(void)
{
const unsigned int size = 2 * MiB;
@@ -166,7 +311,7 @@ static void test_unmerge_discarded(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, false);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
if (map == MAP_FAILED)
return;
@@ -198,7 +343,7 @@ static void test_unmerge_uffd_wp(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, false);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
if (map == MAP_FAILED)
return;
@@ -341,7 +486,7 @@ static void test_prctl_unmerge(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, true);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, true);
if (map == MAP_FAILED)
return;
@@ -356,9 +501,42 @@ unmap:
munmap(map, size);
}
+static void test_prot_none(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+ int i;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ map = mmap_and_merge_range(0x11, size, PROT_NONE, false);
+ if (map == MAP_FAILED)
+ goto unmap;
+
+ /* Store a unique value in each page on one half using ptrace */
+ for (i = 0; i < size / 2; i += pagesize) {
+ lseek(mem_fd, (uintptr_t) map + i, SEEK_SET);
+ if (write(mem_fd, &i, sizeof(i)) != sizeof(i)) {
+ ksft_test_result_fail("ptrace write failed\n");
+ goto unmap;
+ }
+ }
+
+ /* Trigger unsharing on the other half. */
+ if (madvise(map + size / 2, size / 2, MADV_UNMERGEABLE)) {
+ ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+ goto unmap;
+ }
+
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "Pages were unmerged\n");
+unmap:
+ munmap(map, size);
+}
+
int main(int argc, char **argv)
{
- unsigned int tests = 5;
+ unsigned int tests = 7;
int err;
#ifdef __NR_userfaultfd
@@ -370,6 +548,9 @@ int main(int argc, char **argv)
pagesize = getpagesize();
+ mem_fd = open("/proc/self/mem", O_RDWR);
+ if (mem_fd < 0)
+ ksft_exit_fail_msg("opening /proc/self/mem failed\n");
ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
if (ksm_fd < 0)
ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n");
@@ -379,13 +560,20 @@ int main(int argc, char **argv)
pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
if (pagemap_fd < 0)
ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
+ proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY);
+ proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages",
+ O_RDONLY);
+ ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR);
test_unmerge();
+ test_unmerge_zero_pages();
test_unmerge_discarded();
#ifdef __NR_userfaultfd
test_unmerge_uffd_wp();
#endif
+ test_prot_none();
+
test_prctl();
test_prctl_fork();
test_prctl_unmerge();
diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c
index 60547245e479b1..17bcb07f19f349 100644
--- a/tools/testing/selftests/mm/madv_populate.c
+++ b/tools/testing/selftests/mm/madv_populate.c
@@ -264,14 +264,35 @@ static void test_softdirty(void)
munmap(addr, SIZE);
}
+static int system_has_softdirty(void)
+{
+ /*
+ * There is no way to check if the kernel supports soft-dirty, other
+ * than by writing to a page and seeing if the bit was set. But the
+ * tests are intended to check that the bit gets set when it should, so
+ * doing that check would turn a potentially legitimate fail into a
+ * skip. Fortunately, we know for sure that arm64 does not support
+ * soft-dirty. So for now, let's just use the arch as a corse guide.
+ */
+#if defined(__aarch64__)
+ return 0;
+#else
+ return 1;
+#endif
+}
+
int main(int argc, char **argv)
{
+ int nr_tests = 16;
int err;
pagesize = getpagesize();
+ if (system_has_softdirty())
+ nr_tests += 5;
+
ksft_print_header();
- ksft_set_plan(21);
+ ksft_set_plan(nr_tests);
sense_support();
test_prot_read();
@@ -279,7 +300,8 @@ int main(int argc, char **argv)
test_holes();
test_populate_read();
test_populate_write();
- test_softdirty();
+ if (system_has_softdirty())
+ test_softdirty();
err = ksft_get_fail_cnt();
if (err)
diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c
index 240f2d9dae7a99..7945d07548751b 100644
--- a/tools/testing/selftests/mm/map_populate.c
+++ b/tools/testing/selftests/mm/map_populate.c
@@ -77,7 +77,7 @@ int main(int argc, char **argv)
unsigned long *smap;
ftmp = tmpfile();
- BUG_ON(ftmp == 0, "tmpfile()");
+ BUG_ON(!ftmp, "tmpfile()");
ret = ftruncate(fileno(ftmp), MMAP_SZ);
BUG_ON(ret, "ftruncate()");
diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c
index 379581567f272f..6908569ef4065d 100644
--- a/tools/testing/selftests/mm/migration.c
+++ b/tools/testing/selftests/mm/migration.c
@@ -10,12 +10,13 @@
#include <numa.h>
#include <numaif.h>
#include <sys/mman.h>
+#include <sys/prctl.h>
#include <sys/types.h>
#include <signal.h>
#include <time.h>
#define TWOMEG (2<<20)
-#define RUNTIME (60)
+#define RUNTIME (20)
#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
@@ -155,10 +156,15 @@ TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME)
memset(ptr, 0xde, TWOMEG);
for (i = 0; i < self->nthreads - 1; i++) {
pid = fork();
- if (!pid)
+ if (!pid) {
+ prctl(PR_SET_PDEATHSIG, SIGHUP);
+ /* Parent may have died before prctl so check now. */
+ if (getppid() == 1)
+ kill(getpid(), SIGHUP);
access_mem(ptr);
- else
+ } else {
self->pids[i] = pid;
+ }
}
ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
diff --git a/tools/testing/selftests/mm/mrelease_test.c b/tools/testing/selftests/mm/mrelease_test.c
index dca21042b67927..d822004a374e9d 100644
--- a/tools/testing/selftests/mm/mrelease_test.c
+++ b/tools/testing/selftests/mm/mrelease_test.c
@@ -7,6 +7,7 @@
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
+#include <sys/syscall.h>
#include <sys/wait.h>
#include <unistd.h>
#include <asm-generic/unistd.h>
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 3f26f6e15b2a49..3e2bc818d566f6 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -12,11 +12,14 @@ exitcode=0
usage() {
cat <<EOF
-usage: ${BASH_SOURCE[0]:-$0} [ -h | -t "<categories>"]
+usage: ${BASH_SOURCE[0]:-$0} [ options ]
+
+ -a: run all tests, including extra ones
-t: specify specific categories to tests to run
-h: display this message
-The default behavior is to run all tests.
+The default behavior is to run required tests only. If -a is specified,
+will run all tests.
Alternatively, specific groups tests can be run by passing a string
to the -t argument containing one or more of the following categories
@@ -55,14 +58,27 @@ separated by spaces:
test soft dirty page bit semantics
- cow
test copy-on-write semantics
+- thp
+ test transparent huge pages
+- migration
+ invoke move_pages(2) to exercise the migration entry code
+ paths in the kernel
+- mkdirty
+ test handling of code that might set PTE/PMD dirty in
+ read-only VMAs
+- mdwe
+ test prctl(PR_SET_MDWE, ...)
+
example: ./run_vmtests.sh -t "hmm mmap ksm"
EOF
exit 0
}
+RUN_ALL=false
-while getopts "ht:" OPT; do
+while getopts "aht:" OPT; do
case ${OPT} in
+ "a") RUN_ALL=true ;;
"h") usage ;;
"t") VM_SELFTEST_ITEMS=${OPTARG} ;;
esac
@@ -85,6 +101,30 @@ test_selected() {
fi
}
+run_gup_matrix() {
+ # -t: thp=on, -T: thp=off, -H: hugetlb=on
+ local hugetlb_mb=$(( needmem_KB / 1024 ))
+
+ for huge in -t -T "-H -m $hugetlb_mb"; do
+ # -u: gup-fast, -U: gup-basic, -a: pin-fast, -b: pin-basic, -L: pin-longterm
+ for test_cmd in -u -U -a -b -L; do
+ # -w: write=1, -W: write=0
+ for write in -w -W; do
+ # -S: shared
+ for share in -S " "; do
+ # -n: How many pages to fetch together? 512 is special
+ # because it's default thp size (or 2M on x86), 123 to
+ # just test partial gup when hit a huge in whatever form
+ for num in "-n 1" "-n 512" "-n 123"; do
+ CATEGORY="gup_test" run_test ./gup_test \
+ $huge $test_cmd $write $share $num
+ done
+ done
+ done
+ done
+ done
+}
+
# get huge pagesize and freepages from /proc/meminfo
while read -r name size unit; do
if [ "$name" = "HugePages_Free:" ]; then
@@ -189,13 +229,16 @@ fi
CATEGORY="mmap" run_test ./map_fixed_noreplace
-# get_user_pages_fast() benchmark
-CATEGORY="gup_test" run_test ./gup_test -u
-# pin_user_pages_fast() benchmark
-CATEGORY="gup_test" run_test ./gup_test -a
+if $RUN_ALL; then
+ run_gup_matrix
+else
+ # get_user_pages_fast() benchmark
+ CATEGORY="gup_test" run_test ./gup_test -u
+ # pin_user_pages_fast() benchmark
+ CATEGORY="gup_test" run_test ./gup_test -a
+fi
# Dump pages 0, 19, and 4096, using pin_user_pages:
CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000
-
CATEGORY="gup_test" run_test ./gup_longterm
CATEGORY="userfaultfd" run_test ./uffd-unit-tests
@@ -262,6 +305,10 @@ CATEGORY="madv_populate" run_test ./madv_populate
CATEGORY="memfd_secret" run_test ./memfd_secret
+# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
+CATEGORY="ksm" run_test ./ksm_tests -H -s 100
+# KSM KSM_MERGE_TIME test with size of 100
+CATEGORY="ksm" run_test ./ksm_tests -P -s 100
# KSM MADV_MERGEABLE test with 10 identical pages
CATEGORY="ksm" run_test ./ksm_tests -M -p 10
# KSM unmerge test
@@ -290,11 +337,26 @@ then
CATEGORY="pkey" run_test ./protection_keys_64
fi
-CATEGORY="soft_dirty" run_test ./soft-dirty
+if [ -x ./soft-dirty ]
+then
+ CATEGORY="soft_dirty" run_test ./soft-dirty
+fi
# COW tests
CATEGORY="cow" run_test ./cow
+CATEGORY="thp" run_test ./khugepaged
+
+CATEGORY="thp" run_test ./transhuge-stress -d 20
+
+CATEGORY="thp" run_test ./split_huge_page_test
+
+CATEGORY="migration" run_test ./migration
+
+CATEGORY="mkdirty" run_test ./mkdirty
+
+CATEGORY="mdwe" run_test ./mdwe_test
+
echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}"
exit $exitcode
diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings
index 9abfc60e9e6f49..a953c96aa16e1e 100644
--- a/tools/testing/selftests/mm/settings
+++ b/tools/testing/selftests/mm/settings
@@ -1 +1 @@
-timeout=45
+timeout=180
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index 380ab5f0a5340e..16ed4dfa735983 100644
--- a/tools/testing/selftests/mm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -139,7 +139,7 @@ void test_mmap(unsigned long size, unsigned flags)
before, after, before - after, size);
assert(size == getpagesize() || (before - after) == NUM_PAGES);
show(size);
- err = munmap(map, size);
+ err = munmap(map, size * NUM_PAGES);
assert(!err);
}
@@ -222,7 +222,7 @@ int main(void)
test_mmap(ps, MAP_HUGETLB | arg);
}
printf("Testing default huge mmap\n");
- test_mmap(default_hps, SHM_HUGETLB);
+ test_mmap(default_hps, MAP_HUGETLB);
puts("Testing non-huge shmget");
test_shmget(getpagesize(), 0);
diff --git a/tools/testing/selftests/mm/transhuge-stress.c b/tools/testing/selftests/mm/transhuge-stress.c
index ba9d37ad3a8957..c61fb9350b8c21 100644
--- a/tools/testing/selftests/mm/transhuge-stress.c
+++ b/tools/testing/selftests/mm/transhuge-stress.c
@@ -25,13 +25,14 @@ int main(int argc, char **argv)
{
size_t ram, len;
void *ptr, *p;
- struct timespec a, b;
+ struct timespec start, a, b;
int i = 0;
char *name = NULL;
double s;
uint8_t *map;
size_t map_len;
int pagemap_fd;
+ int duration = 0;
ram = sysconf(_SC_PHYS_PAGES);
if (ram > SIZE_MAX / psize() / 4)
@@ -42,9 +43,11 @@ int main(int argc, char **argv)
while (++i < argc) {
if (!strcmp(argv[i], "-h"))
- errx(1, "usage: %s [size in MiB]", argv[0]);
+ errx(1, "usage: %s [-f <filename>] [-d <duration>] [size in MiB]", argv[0]);
else if (!strcmp(argv[i], "-f"))
name = argv[++i];
+ else if (!strcmp(argv[i], "-d"))
+ duration = atoi(argv[++i]);
else
len = atoll(argv[i]) << 20;
}
@@ -78,6 +81,8 @@ int main(int argc, char **argv)
if (!map)
errx(2, "map malloc");
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
while (1) {
int nr_succeed = 0, nr_failed = 0, nr_pages = 0;
@@ -118,5 +123,8 @@ int main(int argc, char **argv)
"%4d succeed, %4d failed, %4d different pages",
s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20),
nr_succeed, nr_failed, nr_pages);
+
+ if (duration > 0 && b.tv_sec - start.tv_sec >= duration)
+ return 0;
}
}
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
index ba20d750402261..02b89860e193d8 100644
--- a/tools/testing/selftests/mm/uffd-common.c
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -499,6 +499,9 @@ void *uffd_poll_thread(void *arg)
int ret;
char tmp_chr;
+ if (!args->handle_fault)
+ args->handle_fault = uffd_handle_page_fault;
+
pollfd[0].fd = uffd;
pollfd[0].events = POLLIN;
pollfd[1].fd = pipefd[cpu*2];
@@ -527,7 +530,7 @@ void *uffd_poll_thread(void *arg)
err("unexpected msg event %u\n", msg.event);
break;
case UFFD_EVENT_PAGEFAULT:
- uffd_handle_page_fault(&msg, args);
+ args->handle_fault(&msg, args);
break;
case UFFD_EVENT_FORK:
close(uffd);
diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h
index 197f5262fe0d17..7c4fa964c3b088 100644
--- a/tools/testing/selftests/mm/uffd-common.h
+++ b/tools/testing/selftests/mm/uffd-common.h
@@ -77,6 +77,9 @@ struct uffd_args {
unsigned long missing_faults;
unsigned long wp_faults;
unsigned long minor_faults;
+
+ /* A custom fault handler; defaults to uffd_handle_page_fault. */
+ void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args);
};
struct uffd_test_ops {
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
index 995ff13e74c780..469e0476af26bb 100644
--- a/tools/testing/selftests/mm/uffd-stress.c
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -53,21 +53,21 @@ pthread_attr_t attr;
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
const char *examples =
- "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
- "./userfaultfd anon 100 99999\n\n"
- "# Run share memory test on 1GiB region with 99 bounces:\n"
- "./userfaultfd shmem 1000 99\n\n"
- "# Run hugetlb memory test on 256MiB region with 50 bounces:\n"
- "./userfaultfd hugetlb 256 50\n\n"
- "# Run the same hugetlb test but using private file:\n"
- "./userfaultfd hugetlb-private 256 50\n\n"
- "# 10MiB-~6GiB 999 bounces anonymous test, "
- "continue forever unless an error triggers\n"
- "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
+ "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
+ "./uffd-stress anon 100 99999\n\n"
+ "# Run share memory test on 1GiB region with 99 bounces:\n"
+ "./uffd-stress shmem 1000 99\n\n"
+ "# Run hugetlb memory test on 256MiB region with 50 bounces:\n"
+ "./uffd-stress hugetlb 256 50\n\n"
+ "# Run the same hugetlb test but using private file:\n"
+ "./uffd-stress hugetlb-private 256 50\n\n"
+ "# 10MiB-~6GiB 999 bounces anonymous test, "
+ "continue forever unless an error triggers\n"
+ "while ./uffd-stress anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
static void usage(void)
{
- fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces>\n\n");
+ fprintf(stderr, "\nUsage: ./uffd-stress <test type> <MiB> <bounces>\n\n");
fprintf(stderr, "Supported <test type>: anon, hugetlb, "
"hugetlb-private, shmem, shmem-private\n\n");
fprintf(stderr, "Examples:\n\n");
@@ -189,10 +189,8 @@ static int stress(struct uffd_args *args)
locking_thread, (void *)cpu))
return 1;
if (bounces & BOUNCE_POLL) {
- if (pthread_create(&uffd_threads[cpu], &attr,
- uffd_poll_thread,
- (void *)&args[cpu]))
- return 1;
+ if (pthread_create(&uffd_threads[cpu], &attr, uffd_poll_thread, &args[cpu]))
+ err("uffd_poll_thread create");
} else {
if (pthread_create(&uffd_threads[cpu], &attr,
uffd_read_thread,
@@ -250,6 +248,8 @@ static int userfaultfd_stress(void)
struct uffd_args args[nr_cpus];
uint64_t mem_size = nr_pages * page_size;
+ memset(args, 0, sizeof(struct uffd_args) * nr_cpus);
+
if (uffd_test_ctx_init(UFFD_FEATURE_WP_UNPOPULATED, NULL))
err("context init failed");
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index 04d91f144d1cba..2709a34a39c52d 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -951,6 +951,117 @@ static void uffd_zeropage_test(uffd_test_args_t *args)
uffd_test_pass();
}
+static void uffd_register_poison(int uffd, void *addr, uint64_t len)
+{
+ uint64_t ioctls = 0;
+ uint64_t expected = (1 << _UFFDIO_COPY) | (1 << _UFFDIO_POISON);
+
+ if (uffd_register_with_ioctls(uffd, addr, len, true,
+ false, false, &ioctls))
+ err("poison register fail");
+
+ if ((ioctls & expected) != expected)
+ err("registered area doesn't support COPY and POISON ioctls");
+}
+
+static void do_uffdio_poison(int uffd, unsigned long offset)
+{
+ struct uffdio_poison uffdio_poison = { 0 };
+ int ret;
+ __s64 res;
+
+ uffdio_poison.range.start = (unsigned long) area_dst + offset;
+ uffdio_poison.range.len = page_size;
+ uffdio_poison.mode = 0;
+ ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison);
+ res = uffdio_poison.updated;
+
+ if (ret)
+ err("UFFDIO_POISON error: %"PRId64, (int64_t)res);
+ else if (res != page_size)
+ err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res);
+}
+
+static void uffd_poison_handle_fault(
+ struct uffd_msg *msg, struct uffd_args *args)
+{
+ unsigned long offset;
+
+ if (msg->event != UFFD_EVENT_PAGEFAULT)
+ err("unexpected msg event %u", msg->event);
+
+ if (msg->arg.pagefault.flags &
+ (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR))
+ err("unexpected fault type %llu", msg->arg.pagefault.flags);
+
+ offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
+ offset &= ~(page_size-1);
+
+ /* Odd pages -> copy zeroed page; even pages -> poison. */
+ if (offset & page_size)
+ copy_page(uffd, offset, false);
+ else
+ do_uffdio_poison(uffd, offset);
+}
+
+static void uffd_poison_test(uffd_test_args_t *targs)
+{
+ pthread_t uffd_mon;
+ char c;
+ struct uffd_args args = { 0 };
+ struct sigaction act = { 0 };
+ unsigned long nr_sigbus = 0;
+ unsigned long nr;
+
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+ uffd_register_poison(uffd, area_dst, nr_pages * page_size);
+ memset(area_src, 0, nr_pages * page_size);
+
+ args.handle_fault = uffd_poison_handle_fault;
+ if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
+ err("uffd_poll_thread create");
+
+ sigbuf = &jbuf;
+ act.sa_sigaction = sighndl;
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGBUS, &act, 0))
+ err("sigaction");
+
+ for (nr = 0; nr < nr_pages; ++nr) {
+ unsigned long offset = nr * page_size;
+ const char *bytes = (const char *) area_dst + offset;
+ const char *i;
+
+ if (sigsetjmp(*sigbuf, 1)) {
+ /*
+ * Access below triggered a SIGBUS, which was caught by
+ * sighndl, which then jumped here. Count this SIGBUS,
+ * and move on to next page.
+ */
+ ++nr_sigbus;
+ continue;
+ }
+
+ for (i = bytes; i < bytes + page_size; ++i) {
+ if (*i)
+ err("nonzero byte in area_dst (%p) at %p: %u",
+ area_dst, i, *i);
+ }
+ }
+
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
+ if (pthread_join(uffd_mon, NULL))
+ err("pthread_join()");
+
+ if (nr_sigbus != nr_pages / 2)
+ err("expected to receive %lu SIGBUS, actually received %lu",
+ nr_pages / 2, nr_sigbus);
+
+ uffd_test_pass();
+}
+
/*
* Test the returned uffdio_register.ioctls with different register modes.
* Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test.
@@ -1126,6 +1237,12 @@ uffd_test_case_t uffd_tests[] = {
UFFD_FEATURE_PAGEFAULT_FLAG_WP |
UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
},
+ {
+ .name = "poison",
+ .uffd_fn = uffd_poison_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_POISON,
+ },
};
static void usage(const char *prog)
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c
index 7cfaf4a74c571b..cfbc501290d3b6 100644
--- a/tools/testing/selftests/mm/va_high_addr_switch.c
+++ b/tools/testing/selftests/mm/va_high_addr_switch.c
@@ -292,7 +292,7 @@ static int supported_arch(void)
#elif defined(__x86_64__)
return 1;
#elif defined(__aarch64__)
- return 1;
+ return getpagesize() == PAGE_SIZE;
#else
return 0;
#endif
diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c
index 7588428b8fcd7d..e1052443d070ed 100644
--- a/tools/testing/selftests/proc/proc-empty-vm.c
+++ b/tools/testing/selftests/proc/proc-empty-vm.c
@@ -77,7 +77,7 @@ static const char proc_pid_smaps_vsyscall_1[] =
"Swap: 0 kB\n"
"SwapPss: 0 kB\n"
"Locked: 0 kB\n"
-"THPeligible: 0\n"
+"THPeligible: 0\n"
/*
* "ProtectionKey:" field is conditional. It is possible to check it as well,
* but I don't have such machine.
@@ -107,7 +107,7 @@ static const char proc_pid_smaps_vsyscall_2[] =
"Swap: 0 kB\n"
"SwapPss: 0 kB\n"
"Locked: 0 kB\n"
-"THPeligible: 0\n"
+"THPeligible: 0\n"
/*
* "ProtectionKey:" field is conditional. It is possible to check it as well,
* but I'm too tired.