diff -urN linux-2.4.18/arch/i386/config.in linux/arch/i386/config.in --- linux-2.4.18/arch/i386/config.in Tue Aug 20 17:20:33 2002 +++ linux/arch/i386/config.in Sat Aug 24 23:17:30 2002 @@ -105,6 +105,7 @@ define_bool CONFIG_X86_GOOD_APIC y define_bool CONFIG_X86_PGE y define_bool CONFIG_X86_USE_PPRO_CHECKSUM y + define_bool CONFIG_X86_USE_SMP_BALANCE y fi if [ "$CONFIG_MPENTIUM4" = "y" ]; then define_int CONFIG_X86_L1_CACHE_SHIFT 7 diff -urN linux-2.4.18/include/asm-i386/processor.h linux/include/asm-i386/processor.h --- linux-2.4.18/include/asm-i386/processor.h Tue Aug 20 17:21:14 2002 +++ linux/include/asm-i386/processor.h Sat Aug 24 23:17:30 2002 @@ -506,4 +506,6 @@ #endif +#define ARCH_HAS_SMP_BALANCE + #endif /* __ASM_I386_PROCESSOR_H */ diff -urN linux-2.4.18/include/asm-i386/smp_balance.h linux/include/asm-i386/smp_balance.h --- linux-2.4.18/include/asm-i386/smp_balance.h Wed Dec 31 19:00:00 1969 +++ linux/include/asm-i386/smp_balance.h Sat Aug 24 23:20:22 2002 @@ -0,0 +1,56 @@ +#ifndef _ASM_SMP_BALANCE_H +#define _ASM_SMP_BALANCE_H + +/* + * We have an architecture-specific SMP load balancer to improve + * scheduling behavior on hyperthreaded CPUs. Since only P4s have + * HT, we use the code only if CONFIG_MPENTIUM4 is set. + * + * Distributions may want to make this unconditional to support all + * x86 machines on one kernel. The overhead in the non-P4 case is + * minimal while the benefit to SMP P4s is probably decent. + */ +#if defined(CONFIG_X86_USE_SMP_BALANCE) + +/* + * Find any idle processor package (i.e. both virtual processors are idle) + */ +static inline int find_idle_package(int this_cpu) +{ + int i; + + this_cpu = cpu_number_map(this_cpu); + + for (i = (this_cpu + 1) % smp_num_cpus; + i != this_cpu; + i = (i + 1) % smp_num_cpus) { + int physical = cpu_logical_map(i); + int sibling = cpu_sibling_map[physical]; + + if (idle_cpu(physical) && idle_cpu(sibling)) + return physical; + } + return -1; /* not found */ +} + +static inline int arch_load_balance(int this_cpu, int idle) +{ + /* Special hack for hyperthreading */ + if (unlikely(smp_num_siblings > 1 && idle && !idle_cpu(cpu_sibling_map[this_cpu]))) { + int found; + struct runqueue *rq_target; + + if ((found = find_idle_package(this_cpu)) >= 0 ) { + rq_target = cpu_rq(found); + resched_task(rq_target->idle); + return 1; + } + } + return 0; +} + +#else +#define arch_load_balance(x, y) (0) +#endif + +#endif /* _ASM_SMP_BALANCE_H */ diff -urN linux-2.4.18/include/linux/smp_balance.h linux/include/linux/smp_balance.h --- linux-2.4.18/include/linux/smp_balance.h Wed Dec 31 19:00:00 1969 +++ linux/include/linux/smp_balance.h Sat Aug 24 23:14:25 2002 @@ -0,0 +1,14 @@ +#ifndef _LINUX_SMP_BALANCE_H +#define _LINUX_SMP_BALANCE_H + +/* + * per-architecture load balancing logic, e.g. for hyperthreading + */ + +#ifdef ARCH_HAS_SMP_BALANCE +#include +#else +#define arch_load_balance(x, y) (0) +#endif + +#endif /* _LINUX_SMP_BALANCE_H */ diff -urN linux-2.4.18/kernel/sched.c linux/kernel/sched.c --- linux-2.4.18/kernel/sched.c Tue Aug 20 17:21:19 2002 +++ linux/kernel/sched.c Sat Aug 24 23:17:16 2002 @@ -487,6 +487,12 @@ list_t *head, *curr; /* + * Handle architecture-specific balancing, such as hyperthreading. + */ + if (arch_load_balance(this_cpu, idle)) + return; + + /* * We search all runqueues to find the most busy one. * We do this lockless to reduce cache-bouncing overhead, * we re-check the 'best' source CPU later on again, with @@ -621,6 +627,8 @@ spin_unlock(&busiest->lock); } +#include + /* * One of the idle_cpu_tick() or the busy_cpu_tick() function will * gets called every timer tick, on every CPU. Our balancing action