From: Andi Kleen Set up SMT for the domain scheduler on x86-64. This way the scheduling works better on HyperThreading aware systems; in particular it will use both physical CPUs before sharing two virtual CPUs on the same package. This improves performance considerably in some cases. Based on the i386 code and a previous patch from Suresh B. Siddha. --- 25-akpm/arch/x86_64/Kconfig | 10 +++ 25-akpm/arch/x86_64/kernel/Makefile | 1 25-akpm/arch/x86_64/kernel/domain.c | 89 +++++++++++++++++++++++++++++++++ 25-akpm/include/asm-x86_64/processor.h | 5 + 4 files changed, 105 insertions(+) diff -puN arch/x86_64/Kconfig~sched-x86_64-sched-domains-support arch/x86_64/Kconfig --- 25/arch/x86_64/Kconfig~sched-x86_64-sched-domains-support 2004-05-10 01:12:39.625564760 -0700 +++ 25-akpm/arch/x86_64/Kconfig 2004-05-10 01:12:39.634563392 -0700 @@ -239,6 +239,16 @@ config PREEMPT Say Y here if you are feeling brave and building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on SMP + default off + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with Intel Pentium 4 chips with HyperThreading at a + cost of slightly increased overhead in some places. If unsure say + N here. + # someone write a better help text please. config K8_NUMA bool "K8 NUMA support" diff -puN /dev/null arch/x86_64/kernel/domain.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/arch/x86_64/kernel/domain.c 2004-05-10 01:12:39.633563544 -0700 @@ -0,0 +1,89 @@ +#include +#include + +/* Don't do any NUMA setup on Opteron right now. They seem to be + better off with flat scheduling. This is just for SMT. */ + +#ifdef CONFIG_SCHED_SMT + +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first = NULL, *last = NULL; + + /* Set up domains */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + cpu_domain->parent = phys_domain; + cpu_domain->groups = &sched_group_cpus[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = cpu_possible_map; + phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)]; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + int j; + first = last = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpus_clear(cpu->cpumask); + cpu_set(j, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + } + + first = last = NULL; + /* Set up physical groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_group *cpu = &sched_group_phys[i]; + + if (i != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + /* + * Make each extra sibling increase power by 10% of + * the basic CPU. This is very arbitrary. + */ + cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + + mb(); + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + cpu_attach_domain(cpu_domain, i); + } +} + +#endif diff -puN arch/x86_64/kernel/Makefile~sched-x86_64-sched-domains-support arch/x86_64/kernel/Makefile --- 25/arch/x86_64/kernel/Makefile~sched-x86_64-sched-domains-support 2004-05-10 01:12:39.626564608 -0700 +++ 25-akpm/arch/x86_64/kernel/Makefile 2004-05-10 01:12:39.633563544 -0700 @@ -25,6 +25,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_prin obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_SWIOTLB) += swiotlb.o +obj-$(CONFIG_SCHED_SMT) += domain.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KGDB) += kgdb_stub.o diff -puN include/asm-x86_64/processor.h~sched-x86_64-sched-domains-support include/asm-x86_64/processor.h --- 25/include/asm-x86_64/processor.h~sched-x86_64-sched-domains-support 2004-05-10 01:12:39.628564304 -0700 +++ 25-akpm/include/asm-x86_64/processor.h 2004-05-10 01:12:39.632563696 -0700 @@ -456,4 +456,9 @@ static inline void __mwait(unsigned long #define cache_line_size() (boot_cpu_data.x86_cache_alignment) +#ifdef CONFIG_SCHED_SMT +#define ARCH_HAS_SCHED_DOMAIN +#define ARCH_HAS_SCHED_WAKE_IDLE +#endif + #endif /* __ASM_X86_64_PROCESSOR_H */ _