diff -urN numa-sched-ref/arch/alpha/config.in numa-sched/arch/alpha/config.in
--- numa-sched-ref/arch/alpha/config.in	Thu Sep 20 06:48:30 2001
+++ numa-sched/arch/alpha/config.in	Thu Sep 20 06:48:44 2001
@@ -215,6 +215,9 @@
    bool 'Discontiguous Memory Support' CONFIG_DISCONTIGMEM
    if [ "$CONFIG_DISCONTIGMEM" = "y" ]; then
       bool ' NUMA Support' CONFIG_NUMA
+      if [ "$CONFIG_NUMA" = "y" ]; then	
+	bool '  NUMA Scheduler Support' CONFIG_NUMA_SCHED
+      fi
    fi
 fi
 
diff -urN numa-sched-ref/arch/alpha/kernel/entry.S numa-sched/arch/alpha/kernel/entry.S
--- numa-sched-ref/arch/alpha/kernel/entry.S	Thu Sep 20 06:48:29 2001
+++ numa-sched/arch/alpha/kernel/entry.S	Thu Sep 20 06:48:44 2001
@@ -35,7 +35,7 @@
 #define TASK_EXEC_DOMAIN	32
 #define TASK_NEED_RESCHED	40
 #define TASK_PTRACE		48
-#define TASK_PROCESSOR		100
+#define TASK_PROCESSOR		84
 
 /*
  * task flags (must match include/linux/sched.h):
diff -urN numa-sched-ref/include/asm-alpha/mmzone.h numa-sched/include/asm-alpha/mmzone.h
--- numa-sched-ref/include/asm-alpha/mmzone.h	Sat May 26 04:03:47 2001
+++ numa-sched/include/asm-alpha/mmzone.h	Thu Sep 20 06:48:44 2001
@@ -21,7 +21,7 @@
 #ifdef NOTYET
 	kern_vars_t	kern_vars;
 #endif
-#if defined(CONFIG_NUMA) && defined(CONFIG_NUMA_SCHED)
+#ifdef CONFIG_NUMA_SCHED
 	struct numa_schedule_data schedule_data;
 #endif
 } plat_pg_data_t;
diff -urN numa-sched-ref/include/asm-alpha/timex.h numa-sched/include/asm-alpha/timex.h
--- numa-sched-ref/include/asm-alpha/timex.h	Tue Dec 29 22:56:15 1998
+++ numa-sched/include/asm-alpha/timex.h	Thu Sep 20 06:48:44 2001
@@ -27,4 +27,8 @@
 	return ret;
 }
 
+typedef long last_schedule_t;
+#define get_last_schedule() ({ jiffies; })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif
diff -urN numa-sched-ref/include/asm-arm/timex.h numa-sched/include/asm-arm/timex.h
--- numa-sched-ref/include/asm-arm/timex.h	Thu Nov 16 15:37:33 2000
+++ numa-sched/include/asm-arm/timex.h	Thu Sep 20 06:48:44 2001
@@ -23,4 +23,8 @@
 	return 0;
 }
 
+typedef long last_schedule_t;
+#define get_last_schedule() ({ jiffies; })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif
diff -urN numa-sched-ref/include/asm-cris/timex.h numa-sched/include/asm-cris/timex.h
--- numa-sched-ref/include/asm-cris/timex.h	Sat May 26 04:03:47 2001
+++ numa-sched/include/asm-cris/timex.h	Thu Sep 20 06:48:44 2001
@@ -20,4 +20,8 @@
         return 0;
 }
 
+typedef long last_schedule_t;
+#define get_last_schedule() ({ jiffies; })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif
diff -urN numa-sched-ref/include/asm-i386/timex.h numa-sched/include/asm-i386/timex.h
--- numa-sched-ref/include/asm-i386/timex.h	Thu Sep 20 06:25:50 2001
+++ numa-sched/include/asm-i386/timex.h	Thu Sep 20 06:48:44 2001
@@ -47,4 +47,8 @@
 
 extern unsigned long cpu_khz;
 
+typedef cycles_t last_schedule_t;
+#define get_last_schedule() ({ get_cycles(); })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif
diff -urN numa-sched-ref/include/asm-ia64/timex.h numa-sched/include/asm-ia64/timex.h
--- numa-sched-ref/include/asm-ia64/timex.h	Tue May  1 19:35:31 2001
+++ numa-sched/include/asm-ia64/timex.h	Thu Sep 20 06:48:44 2001
@@ -21,4 +21,8 @@
 	return ret;
 }
 
+typedef cycles_t last_schedule_t;
+#define get_last_schedule() ({ get_cycles(); })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif /* _ASM_IA64_TIMEX_H */
diff -urN numa-sched-ref/include/asm-m68k/timex.h numa-sched/include/asm-m68k/timex.h
--- numa-sched-ref/include/asm-m68k/timex.h	Tue Jan  5 20:20:43 1999
+++ numa-sched/include/asm-m68k/timex.h	Thu Sep 20 06:48:44 2001
@@ -19,4 +19,8 @@
 	return 0;
 }
 
+typedef long last_schedule_t;
+#define get_last_schedule() ({ jiffies; })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif
diff -urN numa-sched-ref/include/asm-mips/timex.h numa-sched/include/asm-mips/timex.h
--- numa-sched-ref/include/asm-mips/timex.h	Sat May 13 17:31:25 2000
+++ numa-sched/include/asm-mips/timex.h	Thu Sep 20 06:48:44 2001
@@ -36,6 +36,11 @@
 {
 	return read_32bit_cp0_register(CP0_COUNT);
 }
+
+typedef long last_schedule_t;
+#define get_last_schedule() ({ jiffies; })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif /* __KERNEL__ */
 
 #endif /*  __ASM_MIPS_TIMEX_H */
diff -urN numa-sched-ref/include/asm-mips64/timex.h numa-sched/include/asm-mips64/timex.h
--- numa-sched-ref/include/asm-mips64/timex.h	Thu Sep 20 01:44:14 2001
+++ numa-sched/include/asm-mips64/timex.h	Thu Sep 20 06:48:44 2001
@@ -43,4 +43,8 @@
 	return val;
 }
 
+typedef long last_schedule_t;
+#define get_last_schedule() ({ jiffies; })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif /*  _ASM_TIMEX_H */
diff -urN numa-sched-ref/include/asm-parisc/timex.h numa-sched/include/asm-parisc/timex.h
--- numa-sched-ref/include/asm-parisc/timex.h	Thu Dec 14 22:34:13 2000
+++ numa-sched/include/asm-parisc/timex.h	Thu Sep 20 06:48:44 2001
@@ -18,4 +18,8 @@
 	return mfctl(16);
 }
 
+typedef long last_schedule_t;
+#define get_last_schedule() ({ jiffies; })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif
diff -urN numa-sched-ref/include/asm-ppc/timex.h numa-sched/include/asm-ppc/timex.h
--- numa-sched-ref/include/asm-ppc/timex.h	Thu Sep 20 01:44:15 2001
+++ numa-sched/include/asm-ppc/timex.h	Thu Sep 20 06:48:44 2001
@@ -45,5 +45,9 @@
 	return ret;
 }
 
+typedef long last_schedule_t;
+#define get_last_schedule() ({ jiffies; })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif
 #endif /* __KERNEL__ */
diff -urN numa-sched-ref/include/asm-s390/timex.h numa-sched/include/asm-s390/timex.h
--- numa-sched-ref/include/asm-s390/timex.h	Fri May 12 20:41:44 2000
+++ numa-sched/include/asm-s390/timex.h	Thu Sep 20 06:48:44 2001
@@ -26,4 +26,8 @@
         return 0;
 }
 
+typedef long last_schedule_t;
+#define get_last_schedule() ({ jiffies; })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif
diff -urN numa-sched-ref/include/asm-s390x/timex.h numa-sched/include/asm-s390x/timex.h
--- numa-sched-ref/include/asm-s390x/timex.h	Thu Feb 22 03:45:11 2001
+++ numa-sched/include/asm-s390x/timex.h	Thu Sep 20 06:48:44 2001
@@ -26,4 +26,8 @@
         return 0;
 }
 
+typedef long last_schedule_t;
+#define get_last_schedule() ({ jiffies; })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif
diff -urN numa-sched-ref/include/asm-sh/timex.h numa-sched/include/asm-sh/timex.h
--- numa-sched-ref/include/asm-sh/timex.h	Fri Jan  5 02:19:29 2001
+++ numa-sched/include/asm-sh/timex.h	Thu Sep 20 06:48:44 2001
@@ -21,4 +21,8 @@
 	return 0;
 }
 
+typedef long last_schedule_t;
+#define get_last_schedule() ({ jiffies; })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif /* __ASM_SH_TIMEX_H */
diff -urN numa-sched-ref/include/asm-sparc/timex.h numa-sched/include/asm-sparc/timex.h
--- numa-sched-ref/include/asm-sparc/timex.h	Thu Mar 11 01:53:37 1999
+++ numa-sched/include/asm-sparc/timex.h	Thu Sep 20 06:48:44 2001
@@ -17,4 +17,8 @@
 extern cycles_t cacheflush_time;
 #define get_cycles()	(0)
 
+typedef long last_schedule_t;
+#define get_last_schedule() ({ jiffies; })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif
diff -urN numa-sched-ref/include/asm-sparc64/timex.h numa-sched/include/asm-sparc64/timex.h
--- numa-sched-ref/include/asm-sparc64/timex.h	Thu Mar 11 01:53:38 1999
+++ numa-sched/include/asm-sparc64/timex.h	Thu Sep 20 06:48:44 2001
@@ -21,4 +21,8 @@
 	ret; \
 })
 
+typedef cycles_t last_schedule_t;
+#define get_last_schedule() ({ get_cycles(); })
+#define last_schedule_before(a, b)	({ a < b; })
+
 #endif
diff -urN numa-sched-ref/include/linux/numa_sched.h numa-sched/include/linux/numa_sched.h
--- numa-sched-ref/include/linux/numa_sched.h	Thu Jan  1 01:00:00 1970
+++ numa-sched/include/linux/numa_sched.h	Thu Sep 20 06:48:44 2001
@@ -0,0 +1,53 @@
+/*
+ *  linux/include/linux/numa_sched.h
+ *
+ *  NUMA based scheduler
+ */
+
+#ifndef _LINUX_NUMA_SCHED_H
+#define _LINUX_NUMA_SCHED_H
+
+#ifdef CONFIG_NUMA_SCHED
+#include <linux/cache.h>
+#include <linux/list.h>
+#include <linux/threads.h>
+#include <asm/timex.h>
+
+struct numa_per_cpu_schedule_data {
+	struct task_struct * curr;
+	last_schedule_t last_schedule;
+};
+
+struct numa_schedule_data {
+	struct numa_per_cpu_schedule_data per_cpu[NR_CPUS] ____cacheline_aligned;
+	struct list_head runqueue_head;
+	int nr_running, nr_threads;
+};
+
+#define numa_nr_running_inc() do { NODE_SCHEDULE_DATA(numa_node_id())->nr_running++; } while(0)
+#define numa_nr_running_dec() do { NODE_SCHEDULE_DATA(numa_node_id())->nr_running--; } while(0)
+#define numa_nr_running(nid) (NODE_SCHEDULE_DATA(nid)->nr_running)
+
+#define numa_nr_threads_inc() do { NODE_SCHEDULE_DATA(numa_node_id())->nr_threads++; } while(0)
+#define numa_nr_threads_dec() do { NODE_SCHEDULE_DATA(numa_node_id())->nr_threads--; } while(0)
+#define numa_nr_threads(nid) (NODE_SCHEDULE_DATA(nid)->nr_threads)
+
+#define cpu_curr(cpu) (NODE_SCHEDULE_DATA(cputonode(cpu))->per_cpu[(cpu)].curr)
+#define last_schedule(cpu) (NODE_SCHEDULE_DATA(cputonode(cpu))->per_cpu[(cpu)].last_schedule)
+
+#define numa_runqueue_head(x) (&NODE_SCHEDULE_DATA(x)->runqueue_head)
+
+#else /* CONFIG_NUMA_SCHED */
+
+#define numa_nr_running_inc() do { } while(0)
+#define numa_nr_running_dec() do { } while(0)
+#define numa_nr_threads_inc() do { } while(0)
+#define numa_nr_threads_dec() do { } while(0)
+
+#define cpu_curr(cpu) (aligned_data[(cpu)].schedule_data.curr)
+#define last_schedule(cpu) (aligned_data[(cpu)].schedule_data.last_schedule)
+
+#define numa_runqueue_head(x) (&runqueue_head)
+#endif /* CONFIG_NUMA_SCHED */
+
+#endif /* __ALPHA_NUMA_SCHED_H */
diff -urN numa-sched-ref/include/linux/sched.h numa-sched/include/linux/sched.h
--- numa-sched-ref/include/linux/sched.h	Thu Sep 20 06:48:30 2001
+++ numa-sched/include/linux/sched.h	Thu Sep 20 06:49:12 2001
@@ -26,6 +26,7 @@
 #include <linux/signal.h>
 #include <linux/securebits.h>
 #include <linux/fs_struct.h>
+#include <linux/numa_sched.h>
 
 struct exec_domain;
 
@@ -300,9 +301,9 @@
  * all fields in a single cacheline that are needed for
  * the goodness() loop in schedule().
  */
-	long counter;
-	long nice;
-	unsigned long policy;
+	int counter;
+	int nice;
+	unsigned int policy;
 	struct mm_struct *mm;
 	int has_cpu, processor;
 	unsigned long cpus_allowed;
@@ -311,8 +312,9 @@
 	 * that's just fine.)
 	 */
 	struct list_head run_list;
-	unsigned long sleep_time;
-
+#ifdef CONFIG_NUMA_SCHED
+	int nid;
+#endif
 	struct task_struct *next_task, *prev_task;
 	struct mm_struct *active_mm;
 	struct rw_sem_recursor mm_recursor;
@@ -462,7 +464,7 @@
     mm:			NULL,						\
     active_mm:		&init_mm,					\
     mm_recursor:	RWSEM_RECURSOR_INITIALIZER,			\
-    cpus_allowed:	-1,						\
+    cpus_allowed:	-1UL,						\
     run_list:		LIST_HEAD_INIT(tsk.run_list),			\
     next_task:		&tsk,						\
     prev_task:		&tsk,						\
@@ -767,6 +769,30 @@
 extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait));
 extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
 
+#define nr_running_inc()			\
+do {						\
+	numa_nr_running_inc();			\
+	nr_running++;				\
+} while (0)
+
+#define nr_running_dec()			\
+do {						\
+	numa_nr_running_dec();			\
+	nr_running--;				\
+} while (0)
+
+#define nr_threads_inc()			\
+do {						\
+	numa_nr_threads_inc();			\
+	nr_threads++;				\
+} while (0)
+
+#define nr_threads_dec()			\
+do {						\
+	numa_nr_threads_dec();			\
+	nr_threads--;				\
+} while (0)
+
 #define __wait_event(wq, condition) 					\
 do {									\
 	wait_queue_t __wait;						\
@@ -847,29 +873,28 @@
 #define next_thread(p) \
 	list_entry((p)->thread_group.next, struct task_struct, thread_group)
 
-static inline void del_from_runqueue(struct task_struct * p)
-{
-	nr_running--;
-	p->sleep_time = jiffies;
-	list_del(&p->run_list);
-	p->run_list.next = NULL;
-}
+#define del_from_runqueue(p)			\
+do {						\
+	nr_running_dec();			\
+	list_del(&(p)->run_list);		\
+	(p)->run_list.next = NULL;		\
+} while(0)
 
 static inline int task_on_runqueue(struct task_struct *p)
 {
 	return (p->run_list.next != NULL);
 }
 
-static inline void unhash_process(struct task_struct *p)
-{
-	if (task_on_runqueue(p)) BUG();
-	write_lock_irq(&tasklist_lock);
-	nr_threads--;
-	unhash_pid(p);
-	REMOVE_LINKS(p);
-	list_del(&p->thread_group);
-	write_unlock_irq(&tasklist_lock);
-}
+#define unhash_process(p)			\
+do {						\
+	if (task_on_runqueue(p)) BUG();		\
+	write_lock_irq(&tasklist_lock);		\
+	nr_threads_dec();			\
+	unhash_pid(p);				\
+	REMOVE_LINKS(p);			\
+	list_del(&(p)->thread_group);		\
+	write_unlock_irq(&tasklist_lock);	\
+} while(0)
 
 /* Protects ->fs, ->files, ->mm, and synchronises with wait4().  Nests inside tasklist_lock */
 static inline void task_lock(struct task_struct *p)
diff -urN numa-sched-ref/kernel/fork.c numa-sched/kernel/fork.c
--- numa-sched-ref/kernel/fork.c	Thu Sep 20 01:44:19 2001
+++ numa-sched/kernel/fork.c	Thu Sep 20 06:48:44 2001
@@ -639,7 +639,6 @@
 	{
 		int i;
 		p->has_cpu = 0;
-		p->processor = current->processor;
 		/* ?? should we just memset this ?? */
 		for(i = 0; i < smp_num_cpus; i++)
 			p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
@@ -716,7 +715,7 @@
 
 	SET_LINKS(p);
 	hash_pid(p);
-	nr_threads++;
+	nr_threads_inc();
 	write_unlock_irq(&tasklist_lock);
 
 	if (p->ptrace & PT_PTRACED)
diff -urN numa-sched-ref/kernel/sched.c numa-sched/kernel/sched.c
--- numa-sched-ref/kernel/sched.c	Thu Sep 20 01:44:19 2001
+++ numa-sched/kernel/sched.c	Thu Sep 20 06:48:44 2001
@@ -10,6 +10,7 @@
  *  1998-11-19	Implemented schedule_timeout() and related stuff
  *		by Andrea Arcangeli
  *  1998-12-28  Implemented better SMP scheduling by Ingo Molnar
+ *  2001-01-29	first NUMA scheduler attempt by Andrea Arcangeli, SuSE
  */
 
 /*
@@ -91,6 +92,8 @@
 spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED;  /* inner */
 rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;	/* outer */
 
+#ifndef CONFIG_NUMA_SCHED
+
 static LIST_HEAD(runqueue_head);
 
 /*
@@ -100,13 +103,26 @@
 static union {
 	struct schedule_data {
 		struct task_struct * curr;
-		cycles_t last_schedule;
+		last_schedule_t last_schedule;
 	} schedule_data;
 	char __pad [SMP_CACHE_BYTES];
 } aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
 
-#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
-#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
+#define init_numa_schedule_data() do { } while(0)
+
+#else /* CONFIG_NUMA_SCHED */
+
+static void __init init_numa_schedule_data(void)
+{
+	int i;
+
+	for (i = 0; i < numnodes; i++) {
+		INIT_LIST_HEAD(&NODE_SCHEDULE_DATA(i)->runqueue_head);
+		NODE_SCHEDULE_DATA(i)->nr_running = 0;
+		NODE_SCHEDULE_DATA(i)->nr_threads = 0;
+	}
+}
+#endif /* CONFIG_NUMA_SCHED */
 
 struct kernel_stat kstat;
 extern struct task_struct *child_reaper;
@@ -114,8 +130,9 @@
 #ifdef CONFIG_SMP
 
 #define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])
+#define logical_idle_task(cpu) (init_tasks[cpu])
 #define can_schedule(p,cpu) ((!(p)->has_cpu) && \
-				((p)->cpus_allowed & (1 << cpu)))
+				((p)->cpus_allowed & (1UL << cpu)))
 
 #else
 
@@ -213,8 +230,8 @@
 #ifdef CONFIG_SMP
 	int this_cpu = smp_processor_id();
 	struct task_struct *tsk, *target_tsk;
-	int cpu, best_cpu, i, max_prio;
-	cycles_t oldest_idle;
+	int cpu, best_cpu, i, max_prio, found_idle;
+	last_schedule_t oldest_idle;
 
 	/*
 	 * shortcut if the woken up task's last CPU is
@@ -222,17 +239,17 @@
 	 */
 	best_cpu = p->processor;
 	if (can_schedule(p, best_cpu)) {
-		tsk = idle_task(best_cpu);
-		if (cpu_curr(best_cpu) == tsk) {
-			int need_resched;
+		target_tsk = idle_task(best_cpu);
+		if (cpu_curr(best_cpu) == target_tsk) {
+			long need_resched;
 send_now_idle:
 			/*
 			 * If need_resched == -1 then we can skip sending
 			 * the IPI altogether, tsk->need_resched is
 			 * actively watched by the idle thread.
 			 */
-			need_resched = tsk->need_resched;
-			tsk->need_resched = 1;
+			need_resched = target_tsk->need_resched;
+			target_tsk->need_resched = 1;
 			if ((best_cpu != this_cpu) && !need_resched)
 				smp_send_reschedule(best_cpu);
 			return;
@@ -246,13 +263,17 @@
 	 * one will have the least active cache context.) Also find
 	 * the executing process which has the least priority.
 	 */
-	oldest_idle = (cycles_t) -1;
 	target_tsk = NULL;
 	max_prio = 0;
+	found_idle = 0;
 
 	for (i = 0; i < smp_num_cpus; i++) {
 		cpu = cpu_logical_map(i);
-		if (!can_schedule(p, cpu))
+		if (
+#ifdef CONFIG_NUMA_SCHED
+		    cputonode(cpu) != p->nid ||
+#endif
+		    !can_schedule(p, cpu))
 			continue;
 		tsk = cpu_curr(cpu);
 		/*
@@ -260,13 +281,14 @@
 		 * a priority list between idle CPUs, but this is not
 		 * a problem.
 		 */
-		if (tsk == idle_task(cpu)) {
-			if (last_schedule(cpu) < oldest_idle) {
+		if (tsk == logical_idle_task(i)) {
+			if (!found_idle || last_schedule_before(last_schedule(cpu), oldest_idle)) {
 				oldest_idle = last_schedule(cpu);
 				target_tsk = tsk;
+				found_idle = 1;
 			}
 		} else {
-			if (oldest_idle == -1ULL) {
+			if (!found_idle) {
 				int prio = preemption_goodness(tsk, p, cpu);
 
 				if (prio > max_prio) {
@@ -276,15 +298,33 @@
 			}
 		}
 	}
-	tsk = target_tsk;
-	if (tsk) {
-		if (oldest_idle != -1ULL) {
-			best_cpu = tsk->processor;
-			goto send_now_idle;
+
+#ifdef CONFIG_NUMA_SCHED
+	if (!target_tsk)
+		/* Make sure to use the idle cpus in the other nodes */
+		for (i = 0; i < smp_num_cpus; i++) {
+			cpu = cpu_logical_map(i);
+			if (cputonode(cpu) == p->nid || !can_schedule(p, cpu))
+				continue;
+			tsk = cpu_curr(cpu);
+			if (tsk == logical_idle_task(i)) {
+				if (!found_idle || last_schedule_before(last_schedule(cpu), oldest_idle)) {
+					oldest_idle = last_schedule(cpu);
+					target_tsk = tsk;
+					found_idle = 1;
+					target_tsk->nid = cputonode(cpu);
+				}
+			}
 		}
-		tsk->need_resched = 1;
-		if (tsk->processor != this_cpu)
-			smp_send_reschedule(tsk->processor);
+#endif
+
+	if (target_tsk) {
+		best_cpu = target_tsk->processor;
+		if (found_idle)
+			goto send_now_idle;
+		target_tsk->need_resched = 1;
+		if (best_cpu != this_cpu)
+			smp_send_reschedule(best_cpu);
 	}
 	return;
 		
@@ -308,20 +348,20 @@
  */
 static inline void add_to_runqueue(struct task_struct * p)
 {
-	list_add(&p->run_list, &runqueue_head);
-	nr_running++;
+	list_add(&p->run_list, numa_runqueue_head(p->nid));
+	nr_running_inc();
 }
 
 static inline void move_last_runqueue(struct task_struct * p)
 {
 	list_del(&p->run_list);
-	list_add_tail(&p->run_list, &runqueue_head);
+	list_add_tail(&p->run_list, numa_runqueue_head(p->nid));
 }
 
 static inline void move_first_runqueue(struct task_struct * p)
 {
 	list_del(&p->run_list);
-	list_add(&p->run_list, &runqueue_head);
+	list_add(&p->run_list, numa_runqueue_head(p->nid));
 }
 
 /*
@@ -344,9 +384,9 @@
 	p->state = TASK_RUNNING;
 	if (task_on_runqueue(p))
 		goto out;
-	add_to_runqueue(p);
 	if (!synchronous || !(p->cpus_allowed & (1 << smp_processor_id())))
 		reschedule_idle(p);
+	add_to_runqueue(p);
 	success = 1;
 out:
 	spin_unlock_irqrestore(&runqueue_lock, flags);
@@ -532,10 +572,12 @@
  */
 asmlinkage void schedule(void)
 {
-	struct schedule_data * sched_data;
 	struct task_struct *prev, *next, *p;
 	struct list_head *tmp;
 	int this_cpu, c;
+#ifdef CONFIG_NUMA_SCHED
+	int recalculate_all;
+#endif
 
 
 	spin_lock_prefetch(&runqueue_lock);
@@ -550,12 +592,6 @@
 
 	release_kernel_lock(prev, this_cpu);
 
-	/*
-	 * 'sched_data' is protected by the fact that we can run
-	 * only one process per CPU.
-	 */
-	sched_data = & aligned_data[this_cpu].schedule_data;
-
 	spin_lock_irq(&runqueue_lock);
 
 	/* move an exhausted RR process to be last.. */
@@ -589,7 +625,7 @@
 		goto still_running;
 
 still_running_back:
-	list_for_each(tmp, &runqueue_head) {
+	list_for_each(tmp, numa_runqueue_head(numa_node_id())) {
 		p = list_entry(tmp, struct task_struct, run_list);
 		if (can_schedule(p, this_cpu)) {
 			int weight = goodness(p, this_cpu, prev->active_mm);
@@ -598,6 +634,27 @@
 		}
 	}
 
+#ifdef CONFIG_NUMA_SCHED
+	recalculate_all = 0;
+	if (c < 0) {
+		int nid;
+
+		recalculate_all = 1;
+		for (nid = 0; nid < numnodes; nid++) {
+			if (nid == numa_node_id())
+				continue;
+			list_for_each(tmp, numa_runqueue_head(nid)) {
+				p = list_entry(tmp, struct task_struct, run_list);
+				if (can_schedule(p, this_cpu)) {
+					int weight = goodness(p, this_cpu, prev->active_mm);
+					if (weight > c)
+						c = weight, next = p;
+				}
+			}
+		}
+	}
+#endif
+
 	/* Do we need to re-calculate counters? */
 	if (!c)
 		goto recalculate;
@@ -606,10 +663,16 @@
 	 * switching to the next task, save this fact in
 	 * sched_data.
 	 */
-	sched_data->curr = next;
+	cpu_curr(this_cpu) = next;
 #ifdef CONFIG_SMP
  	next->has_cpu = 1;
 	next->processor = this_cpu;
+#ifdef CONFIG_NUMA_SCHED
+	if (next != idle_task(this_cpu) && next->nid != numa_node_id()) {
+		next->nid = numa_node_id();
+		move_last_runqueue(next);
+	}
+#endif
 #endif
 	spin_unlock_irq(&runqueue_lock);
 
@@ -627,7 +690,7 @@
 	 * and it's approximate, so we do not have to maintain
 	 * it while holding the runqueue spinlock.
  	 */
- 	sched_data->last_schedule = get_cycles();
+ 	last_schedule(this_cpu) = get_last_schedule();
 
 	/*
 	 * We drop the scheduler lock early (it's a global spinlock),
@@ -686,8 +749,13 @@
 		struct task_struct *p;
 		spin_unlock_irq(&runqueue_lock);
 		read_lock(&tasklist_lock);
-		for_each_task(p)
+		for_each_task(p) {
+#ifdef CONFIG_NUMA_SCHED
+			if (!recalculate_all && p->nid != numa_node_id())
+				continue;
+#endif
 			p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
+		}
 		read_unlock(&tasklist_lock);
 		spin_lock_irq(&runqueue_lock);
 	}
@@ -1055,9 +1123,8 @@
 	int i;
 
 	// Subtract non-idle processes running on other CPUs.
-	for (i = 0; i < smp_num_cpus; i++) {
-		int cpu = cpu_logical_map(i);
-		if (aligned_data[cpu].schedule_data.curr != idle_task(cpu))
+	for (i = 0; i < smp_num_cpus; i++)
+		if (cpu_curr(i) != logical_idle_task(i))
 			nr_pending--;
 	}
 #else
@@ -1311,16 +1378,15 @@
 
 void __init init_idle(void)
 {
-	struct schedule_data * sched_data;
-	sched_data = &aligned_data[smp_processor_id()].schedule_data;
+	int cpu = smp_processor_id();
 
 	if (current != &init_task && task_on_runqueue(current)) {
 		printk("UGH! (%d:%d) was on the runqueue, removing.\n",
 			smp_processor_id(), current->pid);
 		del_from_runqueue(current);
 	}
-	sched_data->curr = current;
-	sched_data->last_schedule = get_cycles();
+	cpu_curr(cpu) = current;
+	last_schedule(cpu) = get_last_schedule();
 }
 
 extern void init_timervecs (void);
@@ -1350,4 +1416,6 @@
 	 */
 	atomic_inc(&init_mm.mm_count);
 	enter_lazy_tlb(&init_mm, current, cpu);
+
+	init_numa_schedule_data();
 }