From: Nick Piggin This patch starts to balance woken processes when half the relevant domain's imbalance_pct is reached. Previously balancing would start after a small, constant difference in waker/wakee runqueue loads was reached, which would cause too much process movement when there are lots of processes running. It also turns wake balancing into a domain flag while previously it was always on. Now sched domains can "soft partition" an SMP system without using processor affinities. --- 25-akpm/include/linux/sched.h | 9 ++++++--- 25-akpm/kernel/sched.c | 35 +++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 19 deletions(-) diff -puN include/linux/sched.h~sched-damp-passive-balance include/linux/sched.h --- 25/include/linux/sched.h~sched-damp-passive-balance Mon May 3 15:04:10 2004 +++ 25-akpm/include/linux/sched.h Mon May 3 15:04:10 2004 @@ -556,7 +556,8 @@ do { if (atomic_dec_and_test(&(tsk)->usa #define SD_BALANCE_EXEC 2 /* Balance on exec */ #define SD_WAKE_IDLE 4 /* Wake to idle CPU on task wakeup */ #define SD_WAKE_AFFINE 8 /* Wake task to waking CPU */ -#define SD_SHARE_CPUPOWER 16 /* Domain members share cpu power */ +#define SD_WAKE_BALANCE 16 /* Perform balancing at task wakeup */ +#define SD_SHARE_CPUPOWER 32 /* Domain members share cpu power */ struct sched_group { struct sched_group *next; /* Must be a circular list */ @@ -625,7 +626,8 @@ struct sched_domain { .cache_nice_tries = 1, \ .per_cpu_gain = 100, \ .flags = SD_BALANCE_NEWIDLE \ - | SD_WAKE_AFFINE, \ + | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ @@ -644,7 +646,8 @@ struct sched_domain { .cache_hot_time = (10*1000000), \ .cache_nice_tries = 1, \ .per_cpu_gain = 100, \ - .flags = SD_BALANCE_EXEC, \ + .flags = SD_BALANCE_EXEC \ + | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ diff -puN kernel/sched.c~sched-damp-passive-balance kernel/sched.c --- 25/kernel/sched.c~sched-damp-passive-balance Mon May 3 15:04:10 2004 +++ 25-akpm/kernel/sched.c Mon May 3 15:04:10 2004 @@ -761,25 +761,28 @@ static int try_to_wake_up(task_t * p, un new_cpu = this_cpu; /* Wake to this CPU if we can */ /* - * Passive load balancing. If the queues are very out of balance - * we might as well balance here rather than the periodic load - * balancing. - */ - if (load > this_load + SCHED_LOAD_SCALE*2) - goto out_set_cpu; - - /* - * Migrate the task to the waking domain. - * Do not violate hard affinity. + * Scan domains for affine wakeup and passive balancing + * possibilities. */ for_each_domain(this_cpu, sd) { - if (!(sd->flags & SD_WAKE_AFFINE)) - break; - if (task_hot(p, rq->timestamp_last_tick, sd)) - break; + unsigned int imbalance; + /* + * Start passive balancing when half the imbalance_pct + * limit is reached. + */ + imbalance = sd->imbalance_pct + (sd->imbalance_pct - 100) / 2; - if (cpu_isset(cpu, sd->span)) - goto out_set_cpu; + if ( ((sd->flags & SD_WAKE_AFFINE) && + !task_hot(p, rq->timestamp_last_tick, sd)) + || ((sd->flags & SD_WAKE_BALANCE) && + imbalance*this_load <= 100*load) ) { + /* + * Now sd has SD_WAKE_AFFINE and p is cache cold in sd + * or sd has SD_WAKE_BALANCE and there is an imbalance + */ + if (cpu_isset(cpu, sd->span)) + goto out_set_cpu; + } } new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ _