diff options
author | Mel Gorman <mgorman@techsingularity.net> | 2023-08-17 15:45:03 +0100 |
---|---|---|
committer | Mel Gorman <mgorman@techsingularity.net> | 2023-09-01 11:44:50 +0100 |
commit | 0121550bb7ffeb2e903a92b0c2d5d218862686c2 (patch) | |
tree | 068f905dc38bd7b1b668fd2e12b20fdf16bb1b61 | |
parent | e4147a0369cec4c7e96e81d46e1fef134bedd57b (diff) | |
download | linux-sched-numabselective-v1r5.tar.gz |
sched: numab: Complete scanning of VMAs only when there is no alternativesched-numabselective-v1r5
-rw-r--r-- | include/linux/mm_types.h | 6 | ||||
-rw-r--r-- | include/linux/sched/numa_balancing.h | 1 | ||||
-rw-r--r-- | include/trace/events/sched.h | 3 | ||||
-rw-r--r-- | kernel/sched/fair.c | 48 |
4 files changed, 54 insertions, 4 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 57cafb195eb87e..9ad1f567911388 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -503,6 +503,12 @@ struct vma_numab_state { * VMA_PID_RESET_PERIOD * jiffies. */ + int prev_scan_seq; /* MM scan sequence ID when + * the VMA was last completely + * scanned. A VMA is not + * eligible for scanning if + * prev_scan_seq == numa_scan_seq + */ }; /* diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index 96ebc5e28a5c16..b0b1b0508f8755 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -22,6 +22,7 @@ enum numa_vmaskip_reason { NUMAB_SKIP_SCAN_DELAY, NUMAB_SKIP_PID_INACTIVE, NUMAB_SKIP_PID_IGNORED, + NUMAB_SKIP_SEQ_COMPLETED, }; #ifdef CONFIG_NUMA_BALANCING diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index c2ad8bd2293d93..155ba27a244469 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -671,7 +671,8 @@ DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, EM( NUMAB_SKIP_INACCESSIBLE, "inaccessible" ) \ EM( NUMAB_SKIP_SCAN_DELAY, "scan_delay" ) \ EM( NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) \ - EMe(NUMAB_SKIP_PID_IGNORED, "pid_ignored" ) + EM( NUMAB_SKIP_PID_IGNORED, "pid_ignored" ) \ + EMe(NUMAB_SKIP_SEQ_COMPLETED, "seq_completed" ) /* Redefine for export. */ #undef EM diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3eb5914a51c38c..c35d35b385fcaf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2973,6 +2973,8 @@ static void task_numa_work(struct callback_head *work) unsigned long nr_pte_updates = 0; long pages, virtpages; struct vma_iterator vmi; + bool vma_pids_skipped; + bool vma_pids_forced = false; SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work)); @@ -3015,7 +3017,6 @@ static void task_numa_work(struct callback_head *work) */ p->node_stamp += 2 * TICK_NSEC; - start = mm->numa_scan_offset; pages = sysctl_numa_balancing_scan_size; pages <<= 20 - PAGE_SHIFT; /* MB in pages */ virtpages = pages * 8; /* Scan up to this much virtual space */ @@ -3025,6 +3026,16 @@ static void task_numa_work(struct callback_head *work) if (!mmap_read_trylock(mm)) return; + + /* + * VMAs are skipped if the current PID has not trapped a fault within + * the VMA recently. Allow scanning to be forced if there is no + * suitable VMA remaining. + */ + vma_pids_skipped = false; + +retry_pids: + start = mm->numa_scan_offset; vma_iter_init(&vmi, mm, start); vma = vma_next(&vmi); if (!vma) { @@ -3087,12 +3098,23 @@ static void task_numa_work(struct callback_head *work) continue; } - /* Do not scan the VMA if task has not accessed */ - if (!vma_is_accessed(mm, vma)) { + /* + * Do not scan the VMA if task has not accessed unless no other + * VMA candidate exists. + */ + if (!vma_pids_forced && !vma_is_accessed(mm, vma)) { + vma_pids_skipped = true; trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_PID_INACTIVE); continue; } + /* Do not rescan VMAs twice within the same sequence. */ + if (vma->numab_state->prev_scan_seq == mm->numa_scan_seq) { + mm->numa_scan_offset = vma->vm_end; + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_SEQ_COMPLETED); + continue; + } + /* * RESET access PIDs regularly for old VMAs. Resetting after checking * vma for recent access to avoid clearing PID info before access.. @@ -3129,8 +3151,28 @@ static void task_numa_work(struct callback_head *work) cond_resched(); } while (end != vma->vm_end); + + /* VMA scan is complete, do not scan until next sequence. */ + vma->numab_state->prev_scan_seq = mm->numa_scan_seq; + + /* + * Only force scan within one VMA at a time to limit the + * cost of scanning a potentially uninteresting VMA. + */ + if (vma_pids_forced) + break; } for_each_vma(vmi, vma); + /* + * If no VMAs are remaining and VMAs were skipped due to the PID + * not accessing the VMA previously then force a scan to ensure + * forward progress. + */ + if (!vma && !vma_pids_forced && vma_pids_skipped) { + vma_pids_forced = true; + goto retry_pids; + } + out: /* * It is possible to reach the end of the VMA list but the last few |