Patch from: Robert Love Here are my latest scheduler bits. The main goal is saner interactive performance. - better defaults for interactivity: o max_timeslice: 300ms -> 200ms o default timeslice: 150ms -> 100ms o child_penalty: 95 -> 50 o max_sleep_avg: 2s -> 5s o starvation_limit: 2s -> 5s - tuning knob for NUMA's node_threshold - rq_idx() in UP should typecast to long - cleanup I removed the smart wake up changes, which are questionable and may have been the root of any regressions. linux/sysctl.h | 1 + sched.c | 21 +++++++++++---------- sysctl.c | 4 ++++ 3 files changed, 16 insertions(+), 10 deletions(-) diff -puN include/linux/sysctl.h~rml-scheduler-bits include/linux/sysctl.h --- 25/include/linux/sysctl.h~rml-scheduler-bits 2003-02-11 23:59:26.000000000 -0800 +++ 25-akpm/include/linux/sysctl.h 2003-02-11 23:59:26.000000000 -0800 @@ -169,6 +169,7 @@ enum { SCHED_INTERACTIVE_DELTA=7, /* delta used to scale interactivity */ SCHED_MAX_SLEEP_AVG=8, /* maximum sleep avg attainable */ SCHED_STARVATION_LIMIT=9, /* no re-active if expired is starved */ + SCHED_NODE_THRESHOLD=10, /* NUMA node rebalance threshold */ }; /* CTL_NET names: */ diff -puN kernel/sched.c~rml-scheduler-bits kernel/sched.c --- 25/kernel/sched.c~rml-scheduler-bits 2003-02-11 23:59:26.000000000 -0800 +++ 25-akpm/kernel/sched.c 2003-02-11 23:59:26.000000000 -0800 @@ -54,22 +54,23 @@ /* * These are the 'tuning knobs' of the scheduler: * - * Minimum timeslice is 10 msecs, default timeslice is 150 msecs, - * maximum timeslice is 300 msecs. Timeslices get refilled after + * Minimum timeslice is 10 msecs, default timeslice is 100 msecs, + * maximum timeslice is 200 msecs. Timeslices get refilled after * they expire. * * They are configurable via /proc/sys/sched */ int min_timeslice = (10 * HZ) / 1000; -int max_timeslice = (300 * HZ) / 1000; -int child_penalty = 95; +int max_timeslice = (200 * HZ) / 1000; +int child_penalty = 50; int parent_penalty = 100; int exit_weight = 3; int prio_bonus_ratio = 25; int interactive_delta = 2; -int max_sleep_avg = 2 * HZ; -int starvation_limit = 2 * HZ; +int max_sleep_avg = 5 * HZ; +int starvation_limit = 3 * HZ; +int node_threshold = 125; #define MIN_TIMESLICE (min_timeslice) #define MAX_TIMESLICE (max_timeslice) @@ -80,8 +81,8 @@ int starvation_limit = 2 * HZ; #define INTERACTIVE_DELTA (interactive_delta) #define MAX_SLEEP_AVG (max_sleep_avg) #define STARVATION_LIMIT (starvation_limit) +#define NODE_THRESHOLD (node_threshold) #define AGRESSIVE_IDLE_STEAL 1 -#define NODE_THRESHOLD 125 /* * If a task is 'interactive' then we reinsert it in the active @@ -179,7 +180,7 @@ struct prio_array { # define cpu_active_balance(c) (cpu_rq(c)->cpu[0].active_balance) #else # define MAX_NR_SIBLINGS 1 -# define rq_idx(cpu) (cpu) +# define rq_idx(cpu) ((long) cpu) # define cpu_idx(cpu) 0 # define for_each_sibling(idx, rq) while (0) # define cpu_active_balance(c) 0 @@ -2069,7 +2070,7 @@ out_unlock: * @user_mask_ptr: user-space pointer to the new cpu mask */ asmlinkage int sys_sched_setaffinity(pid_t pid, unsigned int len, - unsigned long *user_mask_ptr) + unsigned long *user_mask_ptr) { unsigned long new_mask; int retval; @@ -2121,7 +2122,7 @@ out_unlock: * @user_mask_ptr: user-space pointer to hold the current cpu mask */ asmlinkage int sys_sched_getaffinity(pid_t pid, unsigned int len, - unsigned long *user_mask_ptr) + unsigned long *user_mask_ptr) { unsigned int real_len; unsigned long mask; diff -puN kernel/sysctl.c~rml-scheduler-bits kernel/sysctl.c --- 25/kernel/sysctl.c~rml-scheduler-bits 2003-02-11 23:59:26.000000000 -0800 +++ 25-akpm/kernel/sysctl.c 2003-02-11 23:59:26.000000000 -0800 @@ -64,6 +64,7 @@ extern int prio_bonus_ratio; extern int interactive_delta; extern int max_sleep_avg; extern int starvation_limit; +extern int node_threshold; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; @@ -399,6 +400,9 @@ static ctl_table sched_table[] = { {SCHED_STARVATION_LIMIT, "starvation_limit", &starvation_limit, sizeof(int), 0644, NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL, &zero, NULL}, + {SCHED_NODE_THRESHOLD, "node_threshold", &node_threshold, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, &one, NULL}, {0} }; _