diff -urNp ref/include/asm-i386/processor.h 2.4.20pre5aa1/include/asm-i386/processor.h --- ref/include/asm-i386/processor.h Fri Aug 30 06:30:17 2002 +++ 2.4.20pre5aa1/include/asm-i386/processor.h Fri Aug 30 06:32:05 2002 @@ -486,6 +486,8 @@ static inline void rep_nop(void) #define cpu_relax() rep_nop() +#define ARCH_HAS_SMP_BALANCE + /* Prefetch instructions for Pentium III and AMD Athlon */ #ifdef CONFIG_MPENTIUMIII diff -urNp ref/include/asm-i386/smp_balance.h 2.4.20pre5aa1/include/asm-i386/smp_balance.h --- ref/include/asm-i386/smp_balance.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa1/include/asm-i386/smp_balance.h Fri Aug 30 06:32:01 2002 @@ -0,0 +1,48 @@ +#ifndef _ASM_SMP_BALANCE_H +#define _ASM_SMP_BALANCE_H + +/* + * We have an architecture-specific SMP load balancer to improve + * scheduling behavior on hyperthreaded CPUs. Since only P4s have + * HT, maybe this should be conditional on CONFIG_MPENTIUM4... + * + */ + +/* + * Find any idle processor package (i.e. both virtual processors are idle) + */ +static inline int find_idle_package(int this_cpu) +{ + int i; + + this_cpu = cpu_number_map(this_cpu); + + for (i = (this_cpu + 1) % smp_num_cpus; + i != this_cpu; + i = (i + 1) % smp_num_cpus) { + int physical = cpu_logical_map(i); + int sibling = cpu_sibling_map[physical]; + + if (idle_cpu(physical) && idle_cpu(sibling)) + return physical; + } + return -1; /* not found */ +} + +static inline int arch_load_balance(int this_cpu, int idle) +{ + /* Special hack for hyperthreading */ + if (unlikely(smp_num_siblings > 1 && idle && !idle_cpu(cpu_sibling_map[this_cpu]))) { + int found; + struct runqueue *rq_target; + + if ((found = find_idle_package(this_cpu)) >= 0 ) { + rq_target = cpu_rq(found); + resched_task(rq_target->idle); + return 1; + } + } + return 0; +} + +#endif /* _ASM_SMP_BALANCE_H */ diff -urNp ref/include/linux/sched.h 2.4.20pre5aa1/include/linux/sched.h --- ref/include/linux/sched.h Fri Aug 30 06:30:22 2002 +++ 2.4.20pre5aa1/include/linux/sched.h Fri Aug 30 06:30:26 2002 @@ -145,6 +145,7 @@ typedef struct task_struct task_t; extern void sched_init(void); extern void init_idle(task_t *idle, int cpu); +extern int idle_cpu(int cpu); extern void show_state(void); extern void cpu_init (void); extern void trap_init(void); diff -urNp ref/include/linux/smp_balance.h 2.4.20pre5aa1/include/linux/smp_balance.h --- ref/include/linux/smp_balance.h Thu Jan 1 01:00:00 1970 +++ 2.4.20pre5aa1/include/linux/smp_balance.h Fri Aug 30 06:30:48 2002 @@ -0,0 +1,14 @@ +#ifndef _LINUX_SMP_BALANCE_H +#define _LINUX_SMP_BALANCE_H + +/* + * per-architecture load balancing logic, e.g. for hyperthreading + */ + +#ifdef ARCH_HAS_SMP_BALANCE +#include +#else +#define arch_load_balance(x, y) (0) +#endif + +#endif /* _LINUX_SMP_BALANCE_H */ diff -urNp ref/kernel/sched.c 2.4.20pre5aa1/kernel/sched.c --- ref/kernel/sched.c Fri Aug 30 06:30:22 2002 +++ 2.4.20pre5aa1/kernel/sched.c Fri Aug 30 06:30:48 2002 @@ -473,6 +473,11 @@ unsigned long nr_context_switches(void) return sum; } +inline int idle_cpu(int cpu) +{ + return cpu_curr(cpu) == cpu_rq(cpu)->idle; +} + #if CONFIG_SMP /* * Lock the busiest runqueue as well, this_rq is locked already. @@ -516,6 +521,8 @@ static inline void pull_task(runqueue_t set_need_resched(); } +#include + /* * Current runqueue is empty, or rebalance tick: if there is an * inbalance (current runqueue is too short) then pull from @@ -534,6 +541,12 @@ static void load_balance(runqueue_t *thi list_t *head, *curr; /* + * Handle architecture-specific balancing, such as hyperthreading. + */ + if (arch_load_balance(this_cpu, idle)) + return; + + /* * We search all runqueues to find the most busy one. * We do this lockless to reduce cache-bouncing overhead, * we re-check the 'best' source CPU later on again, with @@ -1098,11 +1111,6 @@ int task_nice(task_t *p) return TASK_NICE(p); } -int idle_cpu(int cpu) -{ - return cpu_curr(cpu) == cpu_rq(cpu)->idle; -} - static inline task_t *find_process_by_pid(pid_t pid) { return pid ? find_task_by_pid(pid) : current;