From: Matthew Dobson <colpatch@us.ibm.com>

Patch 3/3:

This patch implements a generic version of the nr_cpus_node(node) macro
implemented for ppc64 by the previous patch.

The generic version simply computes an hweight of the bitmask returned by
node_to_cpumask(node) topology macro.

This patch also adds a generic_hweight64() function and an hweight_long()
function which are used as helpers for the generic nr_cpus_node() macro.

This patch also adds a for_each_node_with_cpus() macro, which is used in
sched_best_cpu() in kernel/sched.c to fix the original problem of
scheduling processes on CPU-less nodes.  This macro should also be used in
the future to avoid similar problems.

Test compiled and booted by Andrew Theurer (habanero@us.ibm.com) on both
x440 and ppc64.


 25-akpm/include/linux/bitops.h   |   20 +++++++++++++++++++-
 25-akpm/include/linux/topology.h |   19 +++++++++++++++++++
 25-akpm/kernel/sched.c           |    2 +-
 3 files changed, 39 insertions(+), 2 deletions(-)

diff -puN include/linux/bitops.h~sched_best_cpu-fix-03 include/linux/bitops.h
--- 25/include/linux/bitops.h~sched_best_cpu-fix-03	Tue Jun  3 13:03:31 2003
+++ 25-akpm/include/linux/bitops.h	Tue Jun  3 13:03:31 2003
@@ -1,5 +1,6 @@
 #ifndef _LINUX_BITOPS_H
 #define _LINUX_BITOPS_H
+#include <asm/types.h>
 #include <asm/bitops.h>
 
 /*
@@ -107,7 +108,24 @@ static inline unsigned int generic_hweig
         return (res & 0x0F) + ((res >> 4) & 0x0F);
 }
 
-#include <asm/bitops.h>
+static inline unsigned long generic_hweight64(u64 w)
+{
+	u64 res;
+	if (sizeof(unsigned long) == 4)
+		return generic_hweight32((unsigned int)(w >> 32)) +
+					generic_hweight32((unsigned int)w);
+
+	res = (w & 0x5555555555555555) + ((w >> 1) & 0x5555555555555555);
+	res = (res & 0x3333333333333333) + ((res >> 2) & 0x3333333333333333);
+	res = (res & 0x0F0F0F0F0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F0F0F0F0F);
+	res = (res & 0x00FF00FF00FF00FF) + ((res >> 8) & 0x00FF00FF00FF00FF);
+	res = (res & 0x0000FFFF0000FFFF) + ((res >> 16) & 0x0000FFFF0000FFFF);
+	return (res & 0x00000000FFFFFFFF) + ((res >> 32) & 0x00000000FFFFFFFF);
+}
 
+static inline unsigned long hweight_long(unsigned long w)
+{
+	return sizeof(w) == 4 ? generic_hweight32(w) : generic_hweight64(w);
+}
 
 #endif
diff -puN include/linux/topology.h~sched_best_cpu-fix-03 include/linux/topology.h
--- 25/include/linux/topology.h~sched_best_cpu-fix-03	Tue Jun  3 13:03:31 2003
+++ 25-akpm/include/linux/topology.h	Tue Jun  3 13:03:31 2003
@@ -27,6 +27,25 @@
 #ifndef _LINUX_TOPOLOGY_H
 #define _LINUX_TOPOLOGY_H
 
+#include <linux/bitops.h>
+#include <linux/mmzone.h>
+#include <linux/smp.h>
+
 #include <asm/topology.h>
 
+#ifndef nr_cpus_node
+#define nr_cpus_node(node)	(hweight_long(node_to_cpumask(node)))
+#endif
+
+static inline int __next_node_with_cpus(int node)
+{
+	do
+		++node;
+	while (node < numnodes && !nr_cpus_node(node));
+	return node;
+}
+
+#define for_each_node_with_cpus(node) \
+	for (node = 0; node < numnodes; node = __next_node_with_cpus(node))
+
 #endif /* _LINUX_TOPOLOGY_H */
diff -puN kernel/sched.c~sched_best_cpu-fix-03 kernel/sched.c
--- 25/kernel/sched.c~sched_best_cpu-fix-03	Tue Jun  3 13:03:31 2003
+++ 25-akpm/kernel/sched.c	Tue Jun  3 13:03:31 2003
@@ -779,7 +779,7 @@ static int sched_best_cpu(struct task_st
 		return best_cpu;
 
 	minload = 10000000;
-	for (i = 0; i < numnodes; i++) {
+	for_each_node_with_cpus(i) {
 		load = atomic_read(&node_nr_running[i]);
 		if (load < minload) {
 			minload = load;

_