Patch from Robert Love And here goes my current scheduler bits. Pretty minimal: - tuning knob for NUMA's node_threshold - better defaults for interactivity: o max_timeslice: 300ms -> 200ms o default timeslice: 150ms -> 100ms o child_penalty: 95 -> 50 o max_sleep_avg: 2s -> 5s o starvation_limit: 2s -> 5s - bring back the smart wake up from Ingo's old tree - rq_idx() in UP should typecast to long - cleanup Patch is on top of 2.5.59-mm8 with the previous Ingo scheduler patch added and the old patches removed. linux/sysctl.h | 1 + sched.c | 39 ++++++++++++++++++++++++++++----------- sysctl.c | 4 ++++ 3 files changed, 33 insertions(+), 11 deletions(-) diff -puN include/linux/sysctl.h~rml-scheduler-update2 include/linux/sysctl.h --- 25/include/linux/sysctl.h~rml-scheduler-update2 2003-02-07 00:58:35.000000000 -0800 +++ 25-akpm/include/linux/sysctl.h 2003-02-07 00:58:35.000000000 -0800 @@ -169,6 +169,7 @@ enum { SCHED_INTERACTIVE_DELTA=7, /* delta used to scale interactivity */ SCHED_MAX_SLEEP_AVG=8, /* maximum sleep avg attainable */ SCHED_STARVATION_LIMIT=9, /* no re-active if expired is starved */ + SCHED_NODE_THRESHOLD=10, /* NUMA node rebalance threshold */ }; /* CTL_NET names: */ diff -puN kernel/sched.c~rml-scheduler-update2 kernel/sched.c --- 25/kernel/sched.c~rml-scheduler-update2 2003-02-07 00:58:35.000000000 -0800 +++ 25-akpm/kernel/sched.c 2003-02-07 00:59:12.000000000 -0800 @@ -54,22 +54,23 @@ /* * These are the 'tuning knobs' of the scheduler: * - * Minimum timeslice is 10 msecs, default timeslice is 150 msecs, - * maximum timeslice is 300 msecs. Timeslices get refilled after + * Minimum timeslice is 10 msecs, default timeslice is 100 msecs, + * maximum timeslice is 200 msecs. Timeslices get refilled after * they expire. * * They are configurable via /proc/sys/sched */ int min_timeslice = (10 * HZ) / 1000; -int max_timeslice = (300 * HZ) / 1000; -int child_penalty = 95; +int max_timeslice = (200 * HZ) / 1000; +int child_penalty = 50; int parent_penalty = 100; int exit_weight = 3; int prio_bonus_ratio = 25; int interactive_delta = 2; -int max_sleep_avg = 2 * HZ; -int starvation_limit = 2 * HZ; +int max_sleep_avg = 5 * HZ; +int starvation_limit = 3 * HZ; +int node_threshold = 125; #define MIN_TIMESLICE (min_timeslice) #define MAX_TIMESLICE (max_timeslice) @@ -81,7 +82,9 @@ int starvation_limit = 2 * HZ; #define MAX_SLEEP_AVG (max_sleep_avg) #define STARVATION_LIMIT (starvation_limit) #define AGRESSIVE_IDLE_STEAL 1 -#define NODE_THRESHOLD 125 +#define NODE_THRESHOLD (node_threshold) +#define SYNC_WAKEUPS 1 +#define SMART_WAKE_CHILD 1 /* * If a task is 'interactive' then we reinsert it in the active @@ -179,7 +182,7 @@ struct prio_array { # define cpu_active_balance(c) (cpu_rq(c)->cpu[0].active_balance) #else # define MAX_NR_SIBLINGS 1 -# define rq_idx(cpu) (cpu) +# define rq_idx(cpu) ((long) cpu) # define cpu_idx(cpu) 0 # define for_each_sibling(idx, rq) while (0) # define cpu_active_balance(c) 0 @@ -573,6 +576,8 @@ static int try_to_wake_up(task_t * p, in long old_state; runqueue_t *rq; + sync &= SYNC_WAKEUPS; + repeat_lock_task: rq = task_rq_lock(p, &flags); old_state = p->state; @@ -632,7 +637,19 @@ void wake_up_forked_process(task_t * p) p->prio = effective_prio(p); } set_task_cpu(p, smp_processor_id()); - activate_task(p, rq); + + if (SMART_WAKE_CHILD) { + if (unlikely(!current->array)) + __activate_task(p, rq); + else { + p->prio = current->prio; + list_add_tail(&p->run_list, ¤t->run_list); + p->array = current->array; + p->array->nr_active++; + nr_running_inc(rq); + } + } else + activate_task(p, rq); rq_unlock(rq); } @@ -2060,7 +2077,7 @@ out_unlock: * @user_mask_ptr: user-space pointer to the new cpu mask */ asmlinkage int sys_sched_setaffinity(pid_t pid, unsigned int len, - unsigned long *user_mask_ptr) + unsigned long *user_mask_ptr) { unsigned long new_mask; int retval; @@ -2112,7 +2129,7 @@ out_unlock: * @user_mask_ptr: user-space pointer to hold the current cpu mask */ asmlinkage int sys_sched_getaffinity(pid_t pid, unsigned int len, - unsigned long *user_mask_ptr) + unsigned long *user_mask_ptr) { unsigned int real_len; unsigned long mask; diff -puN kernel/sysctl.c~rml-scheduler-update2 kernel/sysctl.c --- 25/kernel/sysctl.c~rml-scheduler-update2 2003-02-07 00:58:35.000000000 -0800 +++ 25-akpm/kernel/sysctl.c 2003-02-07 00:58:35.000000000 -0800 @@ -64,6 +64,7 @@ extern int prio_bonus_ratio; extern int interactive_delta; extern int max_sleep_avg; extern int starvation_limit; +extern int node_threshold; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; @@ -399,6 +400,9 @@ static ctl_table sched_table[] = { {SCHED_STARVATION_LIMIT, "starvation_limit", &starvation_limit, sizeof(int), 0644, NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL, &zero, NULL}, + {SCHED_NODE_THRESHOLD, "node_threshold", &node_threshold, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, &one, NULL}, {0} }; _