From: Nick Piggin The following patch builds a scheduling description for the i386 architecture using cpu_sibling_map to set up SMT if CONFIG_SCHED_SMT is set. It could be made more fancy and collapse degenerate domains at runtime (ie. 1 sibling per CPU, or 1 NUMA node in the computer). --- arch/i386/Kconfig | 10 + arch/i386/kernel/smpboot.c | 229 +++++++++++++++++++++++++++++++++++++++++++ include/asm-i386/processor.h | 5 3 files changed, 244 insertions(+) diff -puN arch/i386/Kconfig~sched-domains-i386-ht arch/i386/Kconfig --- 25/arch/i386/Kconfig~sched-domains-i386-ht 2004-01-24 16:20:44.000000000 -0800 +++ 25-akpm/arch/i386/Kconfig 2004-01-24 16:20:44.000000000 -0800 @@ -459,6 +459,16 @@ config NR_CPUS This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on SMP + default off + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with Intel Pentium 4 chips with HyperThreading at a + cost of slightly increased overhead in some places. If unsure say + N here. + config PREEMPT bool "Preemptible Kernel" help diff -puN arch/i386/kernel/smpboot.c~sched-domains-i386-ht arch/i386/kernel/smpboot.c --- 25/arch/i386/kernel/smpboot.c~sched-domains-i386-ht 2004-01-24 16:20:44.000000000 -0800 +++ 25-akpm/arch/i386/kernel/smpboot.c 2004-01-24 16:20:44.000000000 -0800 @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -1119,6 +1120,234 @@ static void __init smp_boot_cpus(unsigne synchronize_tsc_bp(); } +#ifdef CONFIG_SCHED_SMT +#ifdef CONFIG_NUMA +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static struct sched_group sched_group_nodes[MAX_NUMNODES]; +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static DEFINE_PER_CPU(struct sched_domain, node_domains); +__init void arch_init_sched_domains(void) +{ + int i; + cpumask_t all_cpus = CPU_MASK_NONE; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_possible(i)) + continue; + + cpu_set(i, all_cpus); + } + + /* Set up domains */ + for_each_cpu_mask(i, all_cpus) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + int node = cpu_to_node(i); + cpumask_t nodemask = node_to_cpumask(node); + + /* TODO: change me to SD_SIBLING_INIT */ + *cpu_domain = SD_CPU_INIT; + cpu_domain->span = cpu_sibling_map[i]; + cpu_domain->flags |= SD_FLAG_WAKE; + cpu_domain->cache_hot_time = 0; + cpu_domain->cache_nice_tries = 0; + cpu_domain->max_interval = 2; + cpu_domain->busy_factor = 8; + cpu_domain->imbalance_pct = 110; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = nodemask; + phys_domain->flags |= SD_FLAG_IDLE; + + *node_domain = SD_NODE_INIT; + node_domain->span = all_cpus; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu_mask(i, all_cpus) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpu->cpumask = CPU_MASK_NONE; + cpu_set(j, cpu->cpumask); + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + for (i = 0; i < MAX_NUMNODES; i++) { + int j; + cpumask_t nodemask; + cpus_and(nodemask, node_to_cpumask(i), all_cpus); + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu_mask(j, nodemask) { + struct sched_domain *cpu_domain = cpu_sched_domain(j); + struct sched_group *cpu = &sched_group_phys[j]; + + if (j != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + /* Set up nodes */ + first_cpu = last_cpu = NULL; + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *cpu = &sched_group_nodes[i]; + cpumask_t nodemask; + cpus_and(nodemask, node_to_cpumask(i), all_cpus); + + if (cpus_empty(nodemask)) + continue; + + cpu->cpumask = nodemask; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + + mb(); + for_each_cpu_mask(i, all_cpus) { + int node = cpu_to_node(i); + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + struct sched_group *cpu_group = &sched_group_cpus[i]; + struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; + struct sched_group *node_group = &sched_group_nodes[node]; + + cpu_domain->parent = phys_domain; + phys_domain->parent = node_domain; + + node_domain->groups = node_group; + phys_domain->groups = phys_group; + cpu_domain->groups = cpu_group; + } +} +#else /* CONFIG_NUMA */ +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +__init void arch_init_sched_domains(void) +{ + int i; + cpumask_t all_cpus = CPU_MASK_NONE; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_possible(i)) + continue; + + cpu_set(i, all_cpus); + } + + /* Set up domains */ + for_each_cpu_mask(i, all_cpus) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + + /* TODO: change me to SD_SIBLING_INIT */ + *cpu_domain = SD_CPU_INIT; + cpu_domain->span = cpu_sibling_map[i]; + cpu_domain->flags |= SD_FLAG_WAKE; + cpu_domain->cache_hot_time = 100000; + cpu_domain->cache_nice_tries = 0; + cpu_domain->max_interval = (HZ/500 ?: 1), + cpu_domain->busy_factor = 8; + cpu_domain->imbalance_pct = 110; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = all_cpus; + phys_domain->flags |= SD_FLAG_IDLE; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu_mask(i, all_cpus) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpu->cpumask = CPU_MASK_NONE; + cpu_set(j, cpu->cpumask); + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu_mask(i, all_cpus) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_group *cpu = &sched_group_phys[i]; + + if (i != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + mb(); + for_each_cpu_mask(i, all_cpus) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_group *cpu_group = &sched_group_cpus[i]; + struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; + cpu_domain->parent = phys_domain; + phys_domain->groups = phys_group; + cpu_domain->groups = cpu_group; + } +} +#endif /* CONFIG_NUMA */ +#endif /* CONFIG_SCHED_SMT */ + /* These are wrappers to interface to the new boot process. Someone who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ void __init smp_prepare_cpus(unsigned int max_cpus) diff -puN include/asm-i386/processor.h~sched-domains-i386-ht include/asm-i386/processor.h --- 25/include/asm-i386/processor.h~sched-domains-i386-ht 2004-01-24 16:20:44.000000000 -0800 +++ 25-akpm/include/asm-i386/processor.h 2004-01-24 16:20:44.000000000 -0800 @@ -630,4 +630,9 @@ extern inline void prefetchw(const void extern void select_idle_routine(const struct cpuinfo_x86 *c); +#ifdef CONFIG_SCHED_SMT +#define ARCH_HAS_SCHED_DOMAIN +#define ARCH_HAS_SCHED_WAKE_BALANCE +#endif + #endif /* __ASM_I386_PROCESSOR_H */ _