From: Dimitri Sivanich Here's a version of the isolated scheduler domain code that I mentioned in an RFC on 7/22. This patch applies on top of 2.6.8-rc2-mm1 (to include all of the new arch_init_sched_domain code). This patch also contains the 2 line fix to remove the check of first_cpu(sd->groups->cpumask)) that Jesse sent in earlier. Note that this has not been tested with CONFIG_SCHED_SMT. I hope that my handling of those instances is OK. Signed-off-by: Dimitri Sivanich Signed-off-by: Andrew Morton --- 25-sparc64-akpm/kernel/sched.c | 81 ++++++++++++++++++++++++++++++++++++++--- 1 files changed, 76 insertions(+), 5 deletions(-) diff -puN kernel/sched.c~sched-isolated-sched-domains kernel/sched.c --- 25-sparc64/kernel/sched.c~sched-isolated-sched-domains 2004-07-31 20:57:13.913144840 -0700 +++ 25-sparc64-akpm/kernel/sched.c 2004-07-31 20:57:13.919143928 -0700 @@ -3710,6 +3710,31 @@ __init static int cpu_to_node_group(int } #endif +/* Groups for isolated scheduling domains */ +static struct sched_group sched_group_isolated[NR_CPUS]; +__init static int cpu_to_isolated_group(int cpu) +{ + return cpu; +} + +cpumask_t __initdata cpu_isolated_map = CPU_MASK_NONE; /* cpus with isolated domains */ + +/* Setup the mask of cpus configured for isolated domains */ +static int __init +isolated_cpu_setup(char *str) +{ + int ints[NR_CPUS], i; + + str = get_options(str, ARRAY_SIZE(ints), ints); + cpus_clear(cpu_isolated_map); + for (i=1; i<=ints[0]; i++) { + cpu_set(ints[i], cpu_isolated_map); + } + return 1; +} + +__setup ("isolcpus=", isolated_cpu_setup); + /* * init_sched_build_groups takes an array of groups, the cpumask we wish * to span, and a pointer to a function which identifies what group a CPU @@ -3758,6 +3783,15 @@ __init static void init_sched_build_grou __init static void arch_init_sched_domains(void) { int i; + cpumask_t cpu_default_map; + + /* + * Setup mask for cpus without special case scheduling requirements. + * For now this just excludes isolated cpus, but could be used to + * exclude other special cases in the future. + */ + cpus_complement(cpu_default_map, cpu_isolated_map); + cpus_and(cpu_default_map, cpu_default_map, cpu_possible_map); /* Set up domains */ for_each_cpu(i) { @@ -3765,12 +3799,37 @@ __init static void arch_init_sched_domai struct sched_domain *sd = NULL, *p; cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); + cpus_and(nodemask, nodemask, cpu_default_map); + + /* + * Set up isolated domains. + * Unlike those of other cpus, the domains and groups are + * single level, and span a single cpu. + */ + if (cpu_isset(i, cpu_isolated_map)) { +#ifdef CONFIG_SCHED_SMT + sd = &per_cpu(cpu_domains, i); +#else + sd = &per_cpu(phys_domains, i); +#endif + group = cpu_to_isolated_group(i); + *sd = SD_CPU_INIT; + cpu_set(i, sd->span); + sd->balance_interval = INT_MAX; /* Don't balance */ + sd->flags = 0; /* Avoid WAKE_ */ + sd->groups = &sched_group_isolated[group]; + printk(KERN_INFO "Setting up cpu %d isolated.\n", i); + /* Single level, so continue with next cpu */ + continue; + } + #ifdef CONFIG_NUMA sd = &per_cpu(node_domains, i); group = cpu_to_node_group(i); *sd = SD_NODE_INIT; /* FIXME: should be multilevel, in arch code */ sd->span = sched_domain_node_span(i, SD_NODES_PER_DOMAIN); + cpus_and(sd->span, sd->span, cpu_default_map); sd->groups = &sched_group_nodes[group]; #endif @@ -3788,6 +3847,7 @@ __init static void arch_init_sched_domai group = cpu_to_cpu_group(i); *sd = SD_SIBLING_INIT; sd->span = cpu_sibling_map[i]; + cpus_and(sd->span, sd->span, cpu_default_map); sd->parent = p; sd->groups = &sched_group_cpus[group]; #endif @@ -3796,19 +3856,30 @@ __init static void arch_init_sched_domai #ifdef CONFIG_SCHED_SMT /* Set up CPU (sibling) groups */ for_each_cpu(i) { - if (i != first_cpu(cpu_sibling_map[i])) + cpumask_t this_sibling_map = cpu_sibling_map[i]; + cpus_and(this_sibling_map, this_sibling_map, cpu_default_map); + if (i != first_cpu(this_sibling_map)) continue; - init_sched_build_groups(sched_group_cpus, cpu_sibling_map[i], + init_sched_build_groups(sched_group_cpus, this_sibling_mask, &cpu_to_cpu_group); } #endif + /* Set up isolated groups */ + for_each_cpu_mask(i, cpu_isolated_map) { + cpumask_t mask; + cpus_clear(mask); + cpu_set(i, mask); + init_sched_build_groups(sched_group_isolated, mask, + &cpu_to_isolated_group); + } + /* Set up physical groups */ for (i = 0; i < MAX_NUMNODES; i++) { cpumask_t nodemask = node_to_cpumask(i); - cpus_and(nodemask, nodemask, cpu_possible_map); + cpus_and(nodemask, nodemask, cpu_default_map); if (cpus_empty(nodemask)) continue; @@ -3818,12 +3889,12 @@ __init static void arch_init_sched_domai #ifdef CONFIG_NUMA /* Set up node groups */ - init_sched_build_groups(sched_group_nodes, cpu_possible_map, + init_sched_build_groups(sched_group_nodes, cpu_default_map, &cpu_to_node_group); #endif /* Calculate CPU power for physical packages and nodes */ - for_each_cpu(i) { + for_each_cpu_mask(i, cpu_default_map) { int power; struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT _