--- 25-akpm/include/linux/sched.h | 6 25-akpm/kernel/sched.c | 364 ++++++++++++++++++------------------------ 2 files changed, 166 insertions(+), 204 deletions(-) diff -puN include/linux/sched.h~sched-ingo include/linux/sched.h --- 25/include/linux/sched.h~sched-ingo 2004-03-26 12:26:21.472049240 -0800 +++ 25-akpm/include/linux/sched.h 2004-03-26 12:26:21.476048632 -0800 @@ -601,7 +601,7 @@ struct sched_domain { .busy_factor = 64, \ .imbalance_pct = 125, \ .cache_hot_time = (5*1000000/2), \ - .cache_nice_tries = 1, \ + .cache_nice_tries = 2, \ .per_cpu_gain = 100, \ .flags = SD_BALANCE_NEWIDLE \ | SD_WAKE_AFFINE, \ @@ -621,7 +621,7 @@ struct sched_domain { .busy_factor = 8, \ .imbalance_pct = 125, \ .cache_hot_time = (10*1000000), \ - .cache_nice_tries = 1, \ + .cache_nice_tries = 3, \ .per_cpu_gain = 100, \ .flags = SD_BALANCE_EXEC, \ .last_balance = jiffies, \ @@ -647,7 +647,7 @@ static inline int set_cpus_allowed(task_ extern unsigned long long sched_clock(void); -#ifdef CONFIG_NUMA +#ifdef CONFIG_SMP extern void sched_balance_exec(void); #else #define sched_balance_exec() {} diff -puN kernel/sched.c~sched-ingo kernel/sched.c --- 25/kernel/sched.c~sched-ingo 2004-03-26 12:26:21.473049088 -0800 +++ 25-akpm/kernel/sched.c 2004-03-26 12:26:21.483047568 -0800 @@ -185,6 +185,8 @@ static unsigned int task_timeslice(task_ return BASE_TIMESLICE(p); } +#define task_hot(p, now, sd) \ + ((now) - (p)->timestamp < (sd)->cache_hot_time) /* * These are the runqueue data structures: */ @@ -209,14 +211,7 @@ struct prio_array { struct runqueue { spinlock_t lock; - /* - * nr_running and cpu_load should be in the same cacheline because - * remote CPUs use both these fields when doing load calculation. - */ unsigned long nr_running; -#ifdef CONFIG_SMP - unsigned long cpu_load; -#endif unsigned long long nr_switches; unsigned long expired_timestamp, nr_uninterruptible; unsigned long long timestamp_last_tick; @@ -318,6 +313,21 @@ static void enqueue_task(struct task_str p->array = array; } +#ifdef CONFIG_SMP +/* + * Used by the migration code - we pull tasks from the head of the + * remote queue so we want these tasks to show up at the head of the + * local queue: + */ +static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) +{ + list_add(&p->run_list, array->queue + p->prio); + __set_bit(p->prio, array->bitmap); + array->nr_active++; + p->array = array; +} +#endif + /* * effective_prio - return the priority that is based on the static * priority but is modified by bonuses/penalties. @@ -606,33 +616,22 @@ EXPORT_SYMBOL_GPL(kick_process); /* * Return a low guess at the load of cpu. */ -static inline unsigned long get_low_cpu_load(int cpu) +static inline unsigned long cpu_load(int cpu) { - runqueue_t *rq = cpu_rq(cpu); - unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; - - return min(rq->cpu_load, load_now); -} - -static inline unsigned long get_high_cpu_load(int cpu) -{ - runqueue_t *rq = cpu_rq(cpu); - unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; - - return max(rq->cpu_load, load_now); + return cpu_rq(cpu)->nr_running * SCHED_LOAD_SCALE; } #endif /* - * wake_idle() is useful especially on SMT architectures to wake a - * task onto an idle sibling if we would otherwise wake it onto a - * busy sibling. + * wake_idle() can be used on SMT architectures to wake a task onto + * an idle sibling if 'cpu' is not idle. * - * Returns the CPU we should wake onto. + * Returns 'cpu' if 'cpu' is idle or no siblings of 'cpu' are idle, + * otherwise returns an idle sibling. */ #if defined(ARCH_HAS_SCHED_WAKE_IDLE) -static int wake_idle(int cpu, task_t *p) +static inline int wake_idle(int cpu, task_t *p) { cpumask_t tmp; struct sched_domain *sd; @@ -649,7 +648,6 @@ static int wake_idle(int cpu, task_t *p) for_each_cpu_mask(i, tmp) { if (!cpu_isset(i, p->cpus_allowed)) continue; - if (idle_cpu(i)) return i; } @@ -685,17 +683,16 @@ static int try_to_wake_up(task_t * p, un runqueue_t *rq; int cpu, this_cpu; #ifdef CONFIG_SMP + int new_cpu; unsigned long long now; unsigned long load, this_load; struct sched_domain *sd; - int new_cpu; #endif rq = task_rq_lock(p, &flags); old_state = p->state; if (!(old_state & state)) goto out; - if (p->array) goto out_running; @@ -703,37 +700,41 @@ static int try_to_wake_up(task_t * p, un this_cpu = smp_processor_id(); #ifdef CONFIG_SMP - if (unlikely(task_running(rq, p) || cpu_is_offline(this_cpu))) - goto out_activate; - - new_cpu = this_cpu; /* Wake to this CPU if we can */ - - if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) - goto out_set_cpu; + new_cpu = this_cpu; + sd = cpu_sched_domain(this_cpu); + now = sched_clock(); - /* Passive load balancing */ - load = get_low_cpu_load(cpu); - this_load = get_high_cpu_load(this_cpu) + SCHED_LOAD_SCALE; - if (load > this_load) + if (cpu == this_cpu || unlikely(cpu_is_offline(this_cpu))) goto out_set_cpu; + if (task_running(rq, p)) + goto out_activate; - now = sched_clock(); + /* + * Passive load balancing, migrate the task if: + * + * - remote load is higher than local load, and + * - task is woken up by another task + * - or task is woken up from an irq handler and task is cache-cold. + */ + load = cpu_load(cpu); + this_load = cpu_load(this_cpu); + if (load > this_load && (!in_interrupt() || !task_hot(p, now, sd))) + goto out_set_cpu; /* * Migrate the task to the waking domain. - * Do not violate hard affinity. + * Do not violate soft affinity. */ for_each_domain(this_cpu, sd) { if (!(sd->flags & SD_WAKE_AFFINE)) break; - if (now - p->timestamp < sd->cache_hot_time) + if (task_hot(p, now, sd)) break; - if (cpu_isset(cpu, sd->span)) goto out_set_cpu; } - new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ + new_cpu = cpu; out_set_cpu: new_cpu = wake_idle(new_cpu, p); if (new_cpu != cpu && cpu_isset(new_cpu, p->cpus_allowed)) { @@ -748,10 +749,8 @@ repeat_lock_task: old_state = p->state; if (!(old_state & state)) goto out; - if (p->array) goto out_running; - this_cpu = smp_processor_id(); cpu = task_cpu(p); @@ -769,7 +768,7 @@ out_activate: /* * Sync wakeups (i.e. those types of wakeups where the waker * has indicated that it will leave the CPU in short order) - * don't trigger a preemption, if the woken up task will run on + * dont trigger a preemption, if the woken up task will run on * this cpu. (in this case the 'I will reschedule' promise of * the waker guarantees that the freshly woken up task is going * to be considered on this CPU.) @@ -1102,7 +1101,6 @@ enum idle_type }; #ifdef CONFIG_SMP -#ifdef CONFIG_NUMA /* * If dest_cpu is allowed for this process, migrate the task to it. * This is accomplished by forcing the cpu_allowed mask to only @@ -1112,8 +1110,8 @@ enum idle_type static void sched_migrate_task(task_t *p, int dest_cpu) { migration_req_t req; - runqueue_t *rq; unsigned long flags; + runqueue_t *rq; lock_cpu_hotplug(); rq = task_rq_lock(p, &flags); @@ -1148,19 +1146,19 @@ out: */ static int sched_best_cpu(struct task_struct *p, struct sched_domain *sd) { + int i = 0, min_load, this_cpu, best_cpu; cpumask_t tmp; - int i, min_load, this_cpu, best_cpu; best_cpu = this_cpu = task_cpu(p); - min_load = INT_MAX; + + /* subtract the currently running task's load effect: */ + min_load = cpu_load(i) - SCHED_LOAD_SCALE; cpus_and(tmp, sd->span, cpu_online_map); + cpu_clear(this_cpu, tmp); + for_each_cpu_mask(i, tmp) { - unsigned long load; - if (i == this_cpu) - load = get_low_cpu_load(i); - else - load = get_high_cpu_load(i) + SCHED_LOAD_SCALE; + unsigned long load = cpu_load(i); if (min_load > load) { best_cpu = i; @@ -1172,28 +1170,30 @@ static int sched_best_cpu(struct task_st /* * sched_balance_exec(): find the highest-level, exec-balance-capable - * domain and try to migrate the task to the least loaded CPU. + * domain and try to migrate the current task to the least loaded CPU. * * execve() is a valuable balancing opportunity, because at this point - * the task has the smallest effective memory and cache footprint. + * the task has the smallest effective cache footprint - a completely new + * process image is being created, so almost all of the currently existing + * cache footprint is irrelevant. So we attempt to balance this task as + * broadly as possible, without considering migration costs, which costs + * otherwise affect all other types of task migrations. */ void sched_balance_exec(void) { struct sched_domain *sd, *best_sd = NULL; - int new_cpu; - int this_cpu = get_cpu(); + int new_cpu, this_cpu = get_cpu(); - /* Prefer the current CPU if there's only this task running */ + /* Prefer the current CPU if there's only this task running: */ if (this_rq()->nr_running <= 1) goto out; - for_each_domain(this_cpu, sd) { + for_each_domain(this_cpu, sd) if (sd->flags & SD_BALANCE_EXEC) best_sd = sd; - } if (best_sd) { - new_cpu = sched_best_cpu(current, sd); + new_cpu = sched_best_cpu(current, best_sd); if (new_cpu != this_cpu) { put_cpu(); sched_migrate_task(current, new_cpu); @@ -1203,7 +1203,6 @@ void sched_balance_exec(void) out: put_cpu(); } -#endif /* CONFIG_NUMA */ /* * double_lock_balance - lock the busiest runqueue, this_rq is locked already. @@ -1226,13 +1225,13 @@ static void double_lock_balance(runqueue */ static inline void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, - runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) + runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) { dequeue_task(p, src_array); - this_rq->nr_running--; + src_rq->nr_running--; set_task_cpu(p, this_cpu); this_rq->nr_running++; - enqueue_task(p, this_array); + enqueue_task_head(p, this_array); p->timestamp = sched_clock() - (src_rq->timestamp_last_tick - p->timestamp); /* @@ -1248,7 +1247,7 @@ void pull_task(runqueue_t *src_rq, prio_ */ static inline int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, - struct sched_domain *sd, enum idle_type idle) + struct sched_domain *sd, enum idle_type idle) { /* * We do not migrate tasks that are: @@ -1261,15 +1260,19 @@ int can_migrate_task(task_t *p, runqueue if (!cpu_isset(this_cpu, p->cpus_allowed)) return 0; - /* Aggressive migration if we've failed balancing */ - if (idle == NEWLY_IDLE || - sd->nr_balance_failed < sd->cache_nice_tries) { - if ((rq->timestamp_last_tick - p->timestamp) - < sd->cache_hot_time) - return 0; - } + if (!task_hot(p, rq->timestamp_last_tick, sd)) + return 1; - return 1; + /* Aggressive migration if newly idle or we've failed balancing */ + if (idle == NEWLY_IDLE) + return 1; + if (idle == IDLE && (sd->flags & SD_BALANCE_NEWIDLE)) + return 1; + if (sd->nr_balance_failed >= sd->cache_nice_tries) + return 1; + + /* abort the search: */ + return -1; } /* @@ -1280,30 +1283,24 @@ int can_migrate_task(task_t *p, runqueue * Called with both runqueues locked. */ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, - unsigned long max_nr_move, struct sched_domain *domain, - enum idle_type idle) + unsigned long max_nr_move, struct sched_domain *sd, + enum idle_type idle) { - int idx; - int pulled = 0; prio_array_t *array, *dst_array; struct list_head *head, *curr; + int ret, idx, pulled = 0; task_t *tmp; if (max_nr_move <= 0 || busiest->nr_running <= 1) goto out; - /* - * We first consider expired tasks. Those will likely not be - * executed in the near future, and they are most likely to - * be cache-cold, thus switching CPUs has the least effect - * on them. - */ - if (busiest->expired->nr_active) { - array = busiest->expired; - dst_array = this_rq->expired; - } else { + /* We first consider active tasks. */ + if (busiest->active->nr_active) { array = busiest->active; dst_array = this_rq->active; + } else { + array = busiest->expired; + dst_array = this_rq->expired; } new_array: @@ -1315,22 +1312,27 @@ skip_bitmap: else idx = find_next_bit(array->bitmap, MAX_PRIO, idx); if (idx >= MAX_PRIO) { - if (array == busiest->expired && busiest->active->nr_active) { - array = busiest->active; - dst_array = this_rq->active; + if (array == busiest->active && busiest->expired->nr_active) { + array = busiest->expired; + dst_array = this_rq->expired; goto new_array; } goto out; } head = array->queue + idx; - curr = head->prev; + curr = head->next; skip_queue: tmp = list_entry(curr, task_t, run_list); - curr = curr->prev; + curr = curr->next; - if (!can_migrate_task(tmp, busiest, this_cpu, domain, idle)) { + ret = can_migrate_task(tmp, busiest, this_cpu, sd, idle); + if (ret == -1) { + idx++; + goto skip_bitmap; + } + if (!ret) { if (curr != head) goto skip_queue; idx++; @@ -1353,46 +1355,30 @@ out: /* * find_busiest_group finds and returns the busiest CPU group within the * domain. It calculates and returns the number of tasks which should be - * moved to restore balance via the imbalance parameter. + * moved to restore balance, via the imbalance parameter. */ static struct sched_group * find_busiest_group(struct sched_domain *sd, int this_cpu, - unsigned long *imbalance, enum idle_type idle) + unsigned long *imbalance, enum idle_type idle) { struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; - unsigned long max_load, avg_load, total_load, this_load, total_pwr; + unsigned long max_load, avg_load, total_load, this_load; + unsigned int total_pwr; - max_load = this_load = total_load = total_pwr = 0; + max_load = this_load = total_load = 0; + total_pwr = 0; do { cpumask_t tmp; - unsigned long load; - int local_group; - int i, nr_cpus = 0; - - local_group = cpu_isset(this_cpu, group->cpumask); + int i; /* Tally up the load of all CPUs in the group */ - avg_load = 0; cpus_and(tmp, group->cpumask, cpu_online_map); - if (cpus_empty(tmp)) { - WARN_ON(1); - goto out_balanced; - } + WARN_ON(cpus_empty(tmp)); - for_each_cpu_mask(i, tmp) { - /* Bias balancing toward cpus of our domain */ - if (local_group) { - load = get_high_cpu_load(i); - } else - load = get_low_cpu_load(i); - - nr_cpus++; - avg_load += load; - } - - if (!nr_cpus) - goto nextgroup; + avg_load = 0; + for_each_cpu_mask(i, tmp) + avg_load += cpu_load(i); total_load += avg_load; total_pwr += group->cpu_power; @@ -1400,7 +1386,7 @@ find_busiest_group(struct sched_domain * /* Adjust by relative CPU power of the group */ avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; - if (local_group) { + if (cpu_isset(this_cpu, group->cpumask)) { this_load = avg_load; this = group; goto nextgroup; @@ -1437,37 +1423,9 @@ nextgroup: */ *imbalance = (min(max_load - avg_load, avg_load - this_load) + 1) / 2; - if (*imbalance <= SCHED_LOAD_SCALE/2) { - unsigned long pwr_now = 0, pwr_move = 0; - unsigned long tmp; - - /* - * OK, we don't have enough imbalance to justify moving tasks, - * however we may be able to increase total CPU power used by - * moving them. - */ + if (*imbalance <= SCHED_LOAD_SCALE/2) + goto out_balanced; - pwr_now += busiest->cpu_power*min(SCHED_LOAD_SCALE, max_load); - pwr_now += this->cpu_power*min(SCHED_LOAD_SCALE, this_load); - pwr_now /= SCHED_LOAD_SCALE; - - /* Amount of load we'd subtract */ - tmp = SCHED_LOAD_SCALE*SCHED_LOAD_SCALE/busiest->cpu_power; - if (max_load > tmp) - pwr_move += busiest->cpu_power*min(SCHED_LOAD_SCALE, - max_load - tmp); - - /* Amount of load we'd add */ - tmp = SCHED_LOAD_SCALE*SCHED_LOAD_SCALE/this->cpu_power; - pwr_move += this->cpu_power*min(this->cpu_power, this_load + tmp); - pwr_move /= SCHED_LOAD_SCALE; - - /* Move if we gain another 8th of a CPU worth of throughput */ - if (pwr_move < pwr_now + SCHED_LOAD_SCALE / 8) - goto out_balanced; - *imbalance = 1; - return busiest; - } /* How many tasks to actually move to equalise the imbalance */ *imbalance = (*imbalance * min(busiest->cpu_power, this->cpu_power)) @@ -1492,14 +1450,15 @@ out_balanced: */ static runqueue_t *find_busiest_queue(struct sched_group *group) { - cpumask_t tmp; unsigned long load, max_load = 0; runqueue_t *busiest = NULL; + cpumask_t tmp; int i; cpus_and(tmp, group->cpumask, cpu_online_map); + for_each_cpu_mask(i, tmp) { - load = get_low_cpu_load(i); + load = cpu_load(i); if (load >= max_load) { max_load = load; @@ -1520,8 +1479,8 @@ static int load_balance(int this_cpu, ru struct sched_domain *sd, enum idle_type idle) { struct sched_group *group; - runqueue_t *busiest; unsigned long imbalance; + runqueue_t *busiest; int nr_moved; spin_lock(&this_rq->lock); @@ -1529,26 +1488,19 @@ static int load_balance(int this_cpu, ru group = find_busiest_group(sd, this_cpu, &imbalance, idle); if (!group) goto out_balanced; - busiest = find_busiest_queue(group); - if (!busiest) - goto out_balanced; - if (unlikely(busiest == this_rq)) { - WARN_ON(1); + if (!busiest || busiest == this_rq) goto out_balanced; - } /* Attempt to move tasks */ double_lock_balance(this_rq, busiest); - nr_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, idle); spin_unlock(&this_rq->lock); spin_unlock(&busiest->lock); if (!nr_moved) { sd->nr_balance_failed++; - - if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { + if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries)) { int wake = 0; spin_lock(&busiest->lock); @@ -1560,17 +1512,16 @@ static int load_balance(int this_cpu, ru spin_unlock(&busiest->lock); if (wake) wake_up_process(busiest->migration_thread); - /* - * We've kicked active balancing, reset the failure - * counter. + * We've kicked active balancing, reset the + * failure counter: */ - sd->nr_balance_failed = sd->cache_nice_tries; + sd->nr_balance_failed = 0; } } else sd->nr_balance_failed = 0; - /* We were unbalanced, so reset the balancing interval */ + /* reset the balancing interval: */ sd->balance_interval = sd->min_interval; return nr_moved; @@ -1578,7 +1529,7 @@ static int load_balance(int this_cpu, ru out_balanced: spin_unlock(&this_rq->lock); - /* tune up the balancing interval */ + /* tune up the balancing interval: */ if (sd->balance_interval < sd->max_interval) sd->balance_interval *= 2; @@ -1631,14 +1582,11 @@ static inline void idle_balance(int this if (unlikely(cpu_is_offline(this_cpu))) return; - for_each_domain(this_cpu, sd) { - if (sd->flags & SD_BALANCE_NEWIDLE) { - if (load_balance_newidle(this_cpu, this_rq, sd)) { + for_each_domain(this_cpu, sd) + if (sd->flags & SD_BALANCE_NEWIDLE) + if (load_balance_newidle(this_cpu, this_rq, sd)) /* We've pulled tasks over so stop searching */ break; - } - } - } } /* @@ -1651,19 +1599,18 @@ static inline void idle_balance(int this */ static void active_load_balance(runqueue_t *busiest, int busiest_cpu) { - struct sched_domain *sd; struct sched_group *group, *busy_group; + struct sched_domain *sd; int i; if (busiest->nr_running <= 1) return; - for_each_domain(busiest_cpu, sd) { + for_each_domain(busiest_cpu, sd) if (cpu_isset(busiest->push_cpu, sd->span)) break; - } - if (!sd->parent || !cpu_isset(busiest->push_cpu, sd->span)) { + if (!sd->parent && !cpu_isset(busiest->push_cpu, sd->span)) { WARN_ON(1); return; } @@ -1689,7 +1636,7 @@ static void active_load_balance(runqueue push_cpu = i; nr++; } - if (nr == 0) + if (!nr) goto next_group; rq = cpu_rq(push_cpu); @@ -1713,21 +1660,16 @@ next_group: /* Don't have all balancing operations going off at once */ #define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS) -static void rebalance_tick(int this_cpu, runqueue_t *this_rq, - enum idle_type idle) +static void +rebalance_tick(int this_cpu, runqueue_t *this_rq, enum idle_type idle) { - unsigned long old_load, this_load; unsigned long j = jiffies + CPU_OFFSET(this_cpu); struct sched_domain *sd; if (unlikely(cpu_is_offline(this_cpu))) return; - /* Update our load */ - old_load = this_rq->cpu_load; - this_load = this_rq->nr_running * SCHED_LOAD_SCALE; - this_rq->cpu_load = (old_load + this_load) / 2; - + /* Run through all this CPU's domains */ for_each_domain(this_cpu, sd) { unsigned long interval = sd->balance_interval; @@ -1736,7 +1678,7 @@ static void rebalance_tick(int this_cpu, /* scale ms to jiffies */ interval = MSEC_TO_JIFFIES(interval); - if (unlikely(interval == 0)) + if (unlikely(!interval)) interval = 1; if (j - sd->last_balance >= interval) { @@ -1755,7 +1697,7 @@ static void rebalance_tick(int this_cpu, static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle) { } -static inline void idle_balance(int cpu, runqueue_t *rq) +static inline void idle_balance(int this_cpu, runqueue_t *this_rq) { } #endif @@ -3404,10 +3346,10 @@ static void __init arch_init_sched_domai /* Set up groups */ for (i = 0; i < MAX_NUMNODES; i++) { - cpumask_t tmp = node_to_cpumask(i); - cpumask_t nodemask; struct sched_group *first_cpu = NULL, *last_cpu = NULL; struct sched_group *node = &sched_group_nodes[i]; + cpumask_t tmp = node_to_cpumask(i); + cpumask_t nodemask; int j; cpus_and(nodemask, tmp, cpu_possible_map); @@ -3529,12 +3471,12 @@ void sched_domain_debug(void) printk(" "); printk("groups:"); do { - if (group == NULL) { + if (!group) { printk(" ERROR: NULL"); break; } - if (cpus_weight(group->cpumask) == 0) + if (!cpus_weight(group->cpumask)) printk(" ERROR empty group:"); cpus_and(tmp, groupmask, group->cpumask); @@ -3588,9 +3530,29 @@ void __init sched_init(void) for (i = 0; i < NR_CPUS; i++) { prio_array_t *array; #ifdef CONFIG_SMP - struct sched_domain *domain; - domain = cpu_sched_domain(i); - memset(domain, 0, sizeof(struct sched_domain)); + static struct sched_group __initdata sched_group_init[NR_CPUS]; + struct sched_domain *sd; + struct sched_group *group; + + /* + * Create isolated, 1-CPU, no-balancing domains to avoid + * special-cases during early bootup. Once topology info + * is available later into the bootup, the architecture + * sets up an optimal domain-hierarchy, in the + * arch_init_sched_domains() function. + */ + sd = cpu_sched_domain(i); + memset(sd, 0, sizeof(struct sched_domain)); + cpus_clear(sd->span); + cpu_set(i, sd->span); + + group = sched_group_init + i; + group->next = group; + cpus_clear(group->cpumask); + cpu_set(i, group->cpumask); + group->cpu_power = SCHED_LOAD_SCALE; + + sd->groups = group; #endif rq = cpu_rq(i); _