aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2023-08-17 15:45:03 +0100
committerMel Gorman <mgorman@techsingularity.net>2023-09-01 11:44:50 +0100
commit0121550bb7ffeb2e903a92b0c2d5d218862686c2 (patch)
tree068f905dc38bd7b1b668fd2e12b20fdf16bb1b61
parente4147a0369cec4c7e96e81d46e1fef134bedd57b (diff)
downloadlinux-sched-numabselective-v1r5.tar.gz
sched: numab: Complete scanning of VMAs only when there is no alternativesched-numabselective-v1r5
-rw-r--r--include/linux/mm_types.h6
-rw-r--r--include/linux/sched/numa_balancing.h1
-rw-r--r--include/trace/events/sched.h3
-rw-r--r--kernel/sched/fair.c48
4 files changed, 54 insertions, 4 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 57cafb195eb87e..9ad1f567911388 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -503,6 +503,12 @@ struct vma_numab_state {
* VMA_PID_RESET_PERIOD
* jiffies.
*/
+ int prev_scan_seq; /* MM scan sequence ID when
+ * the VMA was last completely
+ * scanned. A VMA is not
+ * eligible for scanning if
+ * prev_scan_seq == numa_scan_seq
+ */
};
/*
diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
index 96ebc5e28a5c16..b0b1b0508f8755 100644
--- a/include/linux/sched/numa_balancing.h
+++ b/include/linux/sched/numa_balancing.h
@@ -22,6 +22,7 @@ enum numa_vmaskip_reason {
NUMAB_SKIP_SCAN_DELAY,
NUMAB_SKIP_PID_INACTIVE,
NUMAB_SKIP_PID_IGNORED,
+ NUMAB_SKIP_SEQ_COMPLETED,
};
#ifdef CONFIG_NUMA_BALANCING
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index c2ad8bd2293d93..155ba27a244469 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -671,7 +671,8 @@ DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa,
EM( NUMAB_SKIP_INACCESSIBLE, "inaccessible" ) \
EM( NUMAB_SKIP_SCAN_DELAY, "scan_delay" ) \
EM( NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) \
- EMe(NUMAB_SKIP_PID_IGNORED, "pid_ignored" )
+ EM( NUMAB_SKIP_PID_IGNORED, "pid_ignored" ) \
+ EMe(NUMAB_SKIP_SEQ_COMPLETED, "seq_completed" )
/* Redefine for export. */
#undef EM
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3eb5914a51c38c..c35d35b385fcaf 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2973,6 +2973,8 @@ static void task_numa_work(struct callback_head *work)
unsigned long nr_pte_updates = 0;
long pages, virtpages;
struct vma_iterator vmi;
+ bool vma_pids_skipped;
+ bool vma_pids_forced = false;
SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work));
@@ -3015,7 +3017,6 @@ static void task_numa_work(struct callback_head *work)
*/
p->node_stamp += 2 * TICK_NSEC;
- start = mm->numa_scan_offset;
pages = sysctl_numa_balancing_scan_size;
pages <<= 20 - PAGE_SHIFT; /* MB in pages */
virtpages = pages * 8; /* Scan up to this much virtual space */
@@ -3025,6 +3026,16 @@ static void task_numa_work(struct callback_head *work)
if (!mmap_read_trylock(mm))
return;
+
+ /*
+ * VMAs are skipped if the current PID has not trapped a fault within
+ * the VMA recently. Allow scanning to be forced if there is no
+ * suitable VMA remaining.
+ */
+ vma_pids_skipped = false;
+
+retry_pids:
+ start = mm->numa_scan_offset;
vma_iter_init(&vmi, mm, start);
vma = vma_next(&vmi);
if (!vma) {
@@ -3087,12 +3098,23 @@ static void task_numa_work(struct callback_head *work)
continue;
}
- /* Do not scan the VMA if task has not accessed */
- if (!vma_is_accessed(mm, vma)) {
+ /*
+ * Do not scan the VMA if task has not accessed unless no other
+ * VMA candidate exists.
+ */
+ if (!vma_pids_forced && !vma_is_accessed(mm, vma)) {
+ vma_pids_skipped = true;
trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_PID_INACTIVE);
continue;
}
+ /* Do not rescan VMAs twice within the same sequence. */
+ if (vma->numab_state->prev_scan_seq == mm->numa_scan_seq) {
+ mm->numa_scan_offset = vma->vm_end;
+ trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_SEQ_COMPLETED);
+ continue;
+ }
+
/*
* RESET access PIDs regularly for old VMAs. Resetting after checking
* vma for recent access to avoid clearing PID info before access..
@@ -3129,8 +3151,28 @@ static void task_numa_work(struct callback_head *work)
cond_resched();
} while (end != vma->vm_end);
+
+ /* VMA scan is complete, do not scan until next sequence. */
+ vma->numab_state->prev_scan_seq = mm->numa_scan_seq;
+
+ /*
+ * Only force scan within one VMA at a time to limit the
+ * cost of scanning a potentially uninteresting VMA.
+ */
+ if (vma_pids_forced)
+ break;
} for_each_vma(vmi, vma);
+ /*
+ * If no VMAs are remaining and VMAs were skipped due to the PID
+ * not accessing the VMA previously then force a scan to ensure
+ * forward progress.
+ */
+ if (!vma && !vma_pids_forced && vma_pids_skipped) {
+ vma_pids_forced = true;
+ goto retry_pids;
+ }
+
out:
/*
* It is possible to reach the end of the VMA list but the last few