diff -urN linux-2.5.31/arch/i386/config.in linux/arch/i386/config.in
--- linux-2.5.31/arch/i386/config.in	Sat Aug 10 21:41:25 2002
+++ linux/arch/i386/config.in	Sat Aug 24 22:07:10 2002
@@ -103,6 +103,7 @@
    define_bool CONFIG_X86_TSC y
    define_bool CONFIG_X86_GOOD_APIC y
    define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
+   define_bool CONFIG_X86_USE_SMP_BALANCE y
 fi
 if [ "$CONFIG_MK6" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 5
diff -urN linux-2.5.31/include/asm-i386/processor.h linux/include/asm-i386/processor.h
--- linux-2.5.31/include/asm-i386/processor.h	Sat Aug 10 21:41:18 2002
+++ linux/include/asm-i386/processor.h	Sat Aug 24 22:07:10 2002
@@ -488,4 +488,6 @@
 
 #endif
 
+#define ARCH_HAS_SMP_BALANCE
+
 #endif /* __ASM_I386_PROCESSOR_H */
diff -urN linux-2.5.31/include/asm-i386/smp_balance.h linux/include/asm-i386/smp_balance.h
--- linux-2.5.31/include/asm-i386/smp_balance.h	Wed Dec 31 19:00:00 1969
+++ linux/include/asm-i386/smp_balance.h	Sat Aug 24 22:26:15 2002
@@ -0,0 +1,55 @@
+#ifndef _ASM_SMP_BALANCE_H
+#define _ASM_SMP_BALANCE_H
+
+/*
+ * We have an architecture-specific SMP load balancer to improve
+ * scheduling behavior on hyperthreaded CPUs.  Since only P4s have
+ * HT, we only use the code if CONFIG_MPENTIUM4 is set.
+ *
+ * Distributions may want to make this unconditional to support all
+ * x86 machines on one kernel.  The overhead in the non-P4 case is
+ * minimal while the benefit to SMP P4s is probably decent.
+ */
+#if defined(CONFIG_X86_USE_SMP_BALANCE)
+
+/*
+ * Find any idle processor package (i.e. both virtual processors are idle)
+ */
+static inline int find_idle_package(int this_cpu)
+{
+	int i;
+
+	i = this_cpu;
+
+	for (i = (this_cpu + 1) % NR_CPUS;
+	     i != this_cpu;
+	     i = (i + 1) % NR_CPUS) {
+		int sibling = cpu_sibling_map[i];
+
+		if (idle_cpu(i) && idle_cpu(sibling))
+			return i;
+	}
+	return -1;	/* not found */
+}
+
+static inline int arch_load_balance(int this_cpu, int idle)
+{
+	/* Special hack for hyperthreading */
+       if (unlikely(smp_num_siblings > 1 && idle && !idle_cpu(cpu_sibling_map[this_cpu]))) {
+               int found;
+               struct runqueue *rq_target;
+
+               if ((found = find_idle_package(this_cpu)) >= 0 ) {
+                       rq_target = cpu_rq(found);
+                       resched_task(rq_target->idle);
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+#else
+#define arch_load_balance(x, y)		(0)
+#endif
+
+#endif /* _ASM_SMP_BALANCE_H */
diff -urN linux-2.5.31/include/linux/smp_balance.h linux/include/linux/smp_balance.h
--- linux-2.5.31/include/linux/smp_balance.h	Wed Dec 31 19:00:00 1969
+++ linux/include/linux/smp_balance.h	Sat Aug 24 22:10:00 2002
@@ -0,0 +1,14 @@
+#ifndef _LINUX_SMP_BALANCE_H
+#define _LINUX_SMP_BALANCE_H
+
+/*
+ * per-architecture load balancing logic, e.g. for hyperthreading
+ */
+
+#ifdef ARCH_HAS_SMP_BALANCE
+#include <asm/smp_balance.h>
+#else
+#define arch_load_balance(x, y)		(0)
+#endif
+
+#endif /* _LINUX_SMP_BALANCE_H */
diff -urN linux-2.5.31/kernel/sched.c linux/kernel/sched.c
--- linux-2.5.31/kernel/sched.c	Sat Aug 10 21:41:24 2002
+++ linux/kernel/sched.c	Sat Aug 24 22:10:00 2002
@@ -630,6 +630,8 @@
 	return nr_running;
 }
 
+#include <linux/smp_balance.h>
+
 /*
  * find_busiest_queue - find the busiest runqueue.
  */
@@ -639,6 +641,12 @@
 	runqueue_t *busiest, *rq_src;
 
 	/*
+	 * Handle architecture-specific balancing, such as hyperthreading.
+	 */
+	if (arch_load_balance(this_cpu, idle))
+		return NULL;
+
+	/*
 	 * We search all runqueues to find the most busy one.
 	 * We do this lockless to reduce cache-bouncing overhead,
 	 * we re-check the 'best' source CPU later on again, with