diff -purN -X /home/mbligh/.diff.exclude 230-objrmap/kernel/sched.c 240-local_balance_exec/kernel/sched.c --- 230-objrmap/kernel/sched.c 2003-12-11 17:16:03.000000000 -0800 +++ 240-local_balance_exec/kernel/sched.c 2003-12-11 17:16:09.000000000 -0800 @@ -946,36 +946,72 @@ static void sched_migrate_task(task_t *p */ static int sched_best_cpu(struct task_struct *p) { - int i, minload, load, best_cpu, node = 0; + int cpu, node, minload, load, best_cpu, best_node; + int this_cpu, this_node, this_node_load; cpumask_t cpumask; - best_cpu = task_cpu(p); - if (cpu_rq(best_cpu)->nr_running <= 2) - return best_cpu; + this_cpu = best_cpu = task_cpu(p); + if (cpu_rq(this_cpu)->nr_running <= 2) + return this_cpu; + this_node = best_node = cpu_to_node(this_cpu); + + /* + * First look for any node-local idle queue and use that. + * This improves performance under light loads (mbligh). + * In case this node turns out to be the lightest node, store the best + * cpu that we find, so we don't go sniffing the same runqueues again. + */ + minload = 10000000; + cpumask = node_to_cpumask(this_node); + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + if (!cpu_isset(cpu, cpumask)) + continue; + load = cpu_rq(cpu)->nr_running; + if (load == 0) + return cpu; + if (load < minload) { + minload = load; + best_cpu = cpu; + } + } + /* + * Now find the lightest loaded node, and put it in best_node + * + * Node load is always divided by nr_cpus_node to normalise load + * values in case cpu count differs from node to node. We first + * multiply node_nr_running by 16 to get a little better resolution. + */ minload = 10000000; - for_each_node_with_cpus(i) { - /* - * Node load is always divided by nr_cpus_node to normalise - * load values in case cpu count differs from node to node. - * We first multiply node_nr_running by 10 to get a little - * better resolution. - */ - load = 10 * atomic_read(&node_nr_running[i]) / nr_cpus_node(i); + this_node_load = 16 * atomic_read(&node_nr_running[this_node]) + / nr_cpus_node(this_node); + for_each_node_with_cpus(node) { + if (node == this_node) + load = this_node_load; + else + load = 16 * atomic_read(&node_nr_running[node]) + / nr_cpus_node(node); if (load < minload) { minload = load; - node = i; + best_node = node; } } + /* If we chose this node, we already did the legwork earlier */ + if (best_node == this_node) + return best_cpu; + + /* Now find the lightest loaded cpu on best_node, and use that */ minload = 10000000; - cpumask = node_to_cpumask(node); - for (i = 0; i < NR_CPUS; ++i) { - if (!cpu_isset(i, cpumask)) + best_cpu = this_cpu; + cpumask = node_to_cpumask(best_node); + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + if (!cpu_isset(cpu, cpumask)) continue; - if (cpu_rq(i)->nr_running < minload) { - best_cpu = i; - minload = cpu_rq(i)->nr_running; + load = cpu_rq(cpu)->nr_running; + if (load < minload) { + minload = load; + best_cpu = cpu; } } return best_cpu;