diff -urN linux-2.5.31/arch/i386/config.in linux/arch/i386/config.in --- linux-2.5.31/arch/i386/config.in Sat Aug 10 21:41:25 2002 +++ linux/arch/i386/config.in Sat Aug 24 22:07:10 2002 @@ -103,6 +103,7 @@ define_bool CONFIG_X86_TSC y define_bool CONFIG_X86_GOOD_APIC y define_bool CONFIG_X86_USE_PPRO_CHECKSUM y + define_bool CONFIG_X86_USE_SMP_BALANCE y fi if [ "$CONFIG_MK6" = "y" ]; then define_int CONFIG_X86_L1_CACHE_SHIFT 5 diff -urN linux-2.5.31/include/asm-i386/processor.h linux/include/asm-i386/processor.h --- linux-2.5.31/include/asm-i386/processor.h Sat Aug 10 21:41:18 2002 +++ linux/include/asm-i386/processor.h Sat Aug 24 22:07:10 2002 @@ -488,4 +488,6 @@ #endif +#define ARCH_HAS_SMP_BALANCE + #endif /* __ASM_I386_PROCESSOR_H */ diff -urN linux-2.5.31/include/asm-i386/smp_balance.h linux/include/asm-i386/smp_balance.h --- linux-2.5.31/include/asm-i386/smp_balance.h Wed Dec 31 19:00:00 1969 +++ linux/include/asm-i386/smp_balance.h Sat Aug 24 22:26:15 2002 @@ -0,0 +1,55 @@ +#ifndef _ASM_SMP_BALANCE_H +#define _ASM_SMP_BALANCE_H + +/* + * We have an architecture-specific SMP load balancer to improve + * scheduling behavior on hyperthreaded CPUs. Since only P4s have + * HT, we only use the code if CONFIG_MPENTIUM4 is set. + * + * Distributions may want to make this unconditional to support all + * x86 machines on one kernel. The overhead in the non-P4 case is + * minimal while the benefit to SMP P4s is probably decent. + */ +#if defined(CONFIG_X86_USE_SMP_BALANCE) + +/* + * Find any idle processor package (i.e. both virtual processors are idle) + */ +static inline int find_idle_package(int this_cpu) +{ + int i; + + i = this_cpu; + + for (i = (this_cpu + 1) % NR_CPUS; + i != this_cpu; + i = (i + 1) % NR_CPUS) { + int sibling = cpu_sibling_map[i]; + + if (idle_cpu(i) && idle_cpu(sibling)) + return i; + } + return -1; /* not found */ +} + +static inline int arch_load_balance(int this_cpu, int idle) +{ + /* Special hack for hyperthreading */ + if (unlikely(smp_num_siblings > 1 && idle && !idle_cpu(cpu_sibling_map[this_cpu]))) { + int found; + struct runqueue *rq_target; + + if ((found = find_idle_package(this_cpu)) >= 0 ) { + rq_target = cpu_rq(found); + resched_task(rq_target->idle); + return 1; + } + } + return 0; +} + +#else +#define arch_load_balance(x, y) (0) +#endif + +#endif /* _ASM_SMP_BALANCE_H */ diff -urN linux-2.5.31/include/linux/smp_balance.h linux/include/linux/smp_balance.h --- linux-2.5.31/include/linux/smp_balance.h Wed Dec 31 19:00:00 1969 +++ linux/include/linux/smp_balance.h Sat Aug 24 22:10:00 2002 @@ -0,0 +1,14 @@ +#ifndef _LINUX_SMP_BALANCE_H +#define _LINUX_SMP_BALANCE_H + +/* + * per-architecture load balancing logic, e.g. for hyperthreading + */ + +#ifdef ARCH_HAS_SMP_BALANCE +#include +#else +#define arch_load_balance(x, y) (0) +#endif + +#endif /* _LINUX_SMP_BALANCE_H */ diff -urN linux-2.5.31/kernel/sched.c linux/kernel/sched.c --- linux-2.5.31/kernel/sched.c Sat Aug 10 21:41:24 2002 +++ linux/kernel/sched.c Sat Aug 24 22:10:00 2002 @@ -630,6 +630,8 @@ return nr_running; } +#include + /* * find_busiest_queue - find the busiest runqueue. */ @@ -639,6 +641,12 @@ runqueue_t *busiest, *rq_src; /* + * Handle architecture-specific balancing, such as hyperthreading. + */ + if (arch_load_balance(this_cpu, idle)) + return NULL; + + /* * We search all runqueues to find the most busy one. * We do this lockless to reduce cache-bouncing overhead, * we re-check the 'best' source CPU later on again, with