diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-i386/processor.h x/include/asm-i386/processor.h --- x-ref/include/asm-i386/processor.h 2003-04-05 05:12:20.000000000 +0200 +++ x/include/asm-i386/processor.h 2003-04-05 05:12:36.000000000 +0200 @@ -487,6 +487,8 @@ static inline void rep_nop(void) #define cpu_relax() rep_nop() +#define ARCH_HAS_SMP_BALANCE + /* Prefetch instructions for Pentium III and AMD Athlon */ #if defined(CONFIG_MPENTIUMIII) || defined (CONFIG_MPENTIUM4) diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/asm-i386/smp_balance.h x/include/asm-i386/smp_balance.h --- x-ref/include/asm-i386/smp_balance.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/asm-i386/smp_balance.h 2003-04-05 05:12:28.000000000 +0200 @@ -0,0 +1,48 @@ +#ifndef _ASM_SMP_BALANCE_H +#define _ASM_SMP_BALANCE_H + +/* + * We have an architecture-specific SMP load balancer to improve + * scheduling behavior on hyperthreaded CPUs. Since only P4s have + * HT, maybe this should be conditional on CONFIG_MPENTIUM4... + * + */ + +/* + * Find any idle processor package (i.e. both virtual processors are idle) + */ +static inline int find_idle_package(int this_cpu) +{ + int i; + + this_cpu = cpu_number_map(this_cpu); + + for (i = (this_cpu + 1) % smp_num_cpus; + i != this_cpu; + i = (i + 1) % smp_num_cpus) { + int physical = cpu_logical_map(i); + int sibling = cpu_sibling_map[physical]; + + if (idle_cpu(physical) && idle_cpu(sibling)) + return physical; + } + return -1; /* not found */ +} + +static inline int arch_load_balance(int this_cpu, int idle) +{ + /* Special hack for hyperthreading */ + if (unlikely(smp_num_siblings > 1 && idle && !idle_cpu(cpu_sibling_map[this_cpu]))) { + int found; + struct runqueue *rq_target; + + if ((found = find_idle_package(this_cpu)) >= 0 ) { + rq_target = cpu_rq(found); + resched_task(rq_target->idle); + return 1; + } + } + return 0; +} + +#endif /* _ASM_SMP_BALANCE_H */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/sched.h x/include/linux/sched.h --- x-ref/include/linux/sched.h 2003-04-05 05:12:24.000000000 +0200 +++ x/include/linux/sched.h 2003-04-05 05:12:28.000000000 +0200 @@ -145,6 +145,7 @@ typedef struct task_struct task_t; extern void sched_init(void); extern void init_idle(task_t *idle, int cpu); +extern int idle_cpu(int cpu); extern void show_state(void); extern void cpu_init (void); extern void trap_init(void); diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/smp_balance.h x/include/linux/smp_balance.h --- x-ref/include/linux/smp_balance.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/linux/smp_balance.h 2003-04-05 05:12:28.000000000 +0200 @@ -0,0 +1,14 @@ +#ifndef _LINUX_SMP_BALANCE_H +#define _LINUX_SMP_BALANCE_H + +/* + * per-architecture load balancing logic, e.g. for hyperthreading + */ + +#ifdef ARCH_HAS_SMP_BALANCE +#include +#else +#define arch_load_balance(x, y) (0) +#endif + +#endif /* _LINUX_SMP_BALANCE_H */ diff -urNp --exclude CVS --exclude BitKeeper x-ref/kernel/sched.c x/kernel/sched.c --- x-ref/kernel/sched.c 2003-04-05 05:12:23.000000000 +0200 +++ x/kernel/sched.c 2003-04-05 05:12:28.000000000 +0200 @@ -473,6 +473,11 @@ unsigned long nr_context_switches(void) return sum; } +inline int idle_cpu(int cpu) +{ + return cpu_curr(cpu) == cpu_rq(cpu)->idle; +} + #if CONFIG_SMP /* * Lock the busiest runqueue as well, this_rq is locked already. @@ -516,6 +521,8 @@ static inline void pull_task(runqueue_t set_need_resched(); } +#include + /* * Current runqueue is empty, or rebalance tick: if there is an * inbalance (current runqueue is too short) then pull from @@ -534,6 +541,12 @@ static void load_balance(runqueue_t *thi list_t *head, *curr; /* + * Handle architecture-specific balancing, such as hyperthreading. + */ + if (arch_load_balance(this_cpu, idle)) + return; + + /* * We search all runqueues to find the most busy one. * We do this lockless to reduce cache-bouncing overhead, * we re-check the 'best' source CPU later on again, with @@ -1098,11 +1111,6 @@ int task_nice(task_t *p) return TASK_NICE(p); } -int idle_cpu(int cpu) -{ - return cpu_curr(cpu) == cpu_rq(cpu)->idle; -} - static inline task_t *find_process_by_pid(pid_t pid) { return pid ? find_task_by_pid(pid) : current;