From: Matthew Dobson Patch 3/3: This patch implements a generic version of the nr_cpus_node(node) macro implemented for ppc64 by the previous patch. The generic version simply computes an hweight of the bitmask returned by node_to_cpumask(node) topology macro. This patch also adds a generic_hweight64() function and an hweight_long() function which are used as helpers for the generic nr_cpus_node() macro. This patch also adds a for_each_node_with_cpus() macro, which is used in sched_best_cpu() in kernel/sched.c to fix the original problem of scheduling processes on CPU-less nodes. This macro should also be used in the future to avoid similar problems. Test compiled and booted by Andrew Theurer (habanero@us.ibm.com) on both x440 and ppc64. 25-akpm/include/linux/bitops.h | 20 +++++++++++++++++++- 25-akpm/include/linux/topology.h | 19 +++++++++++++++++++ 25-akpm/kernel/sched.c | 2 +- 3 files changed, 39 insertions(+), 2 deletions(-) diff -puN include/linux/bitops.h~sched_best_cpu-fix-03 include/linux/bitops.h --- 25/include/linux/bitops.h~sched_best_cpu-fix-03 Tue Jun 3 13:03:31 2003 +++ 25-akpm/include/linux/bitops.h Tue Jun 3 13:03:31 2003 @@ -1,5 +1,6 @@ #ifndef _LINUX_BITOPS_H #define _LINUX_BITOPS_H +#include #include /* @@ -107,7 +108,24 @@ static inline unsigned int generic_hweig return (res & 0x0F) + ((res >> 4) & 0x0F); } -#include +static inline unsigned long generic_hweight64(u64 w) +{ + u64 res; + if (sizeof(unsigned long) == 4) + return generic_hweight32((unsigned int)(w >> 32)) + + generic_hweight32((unsigned int)w); + + res = (w & 0x5555555555555555) + ((w >> 1) & 0x5555555555555555); + res = (res & 0x3333333333333333) + ((res >> 2) & 0x3333333333333333); + res = (res & 0x0F0F0F0F0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F0F0F0F0F); + res = (res & 0x00FF00FF00FF00FF) + ((res >> 8) & 0x00FF00FF00FF00FF); + res = (res & 0x0000FFFF0000FFFF) + ((res >> 16) & 0x0000FFFF0000FFFF); + return (res & 0x00000000FFFFFFFF) + ((res >> 32) & 0x00000000FFFFFFFF); +} +static inline unsigned long hweight_long(unsigned long w) +{ + return sizeof(w) == 4 ? generic_hweight32(w) : generic_hweight64(w); +} #endif diff -puN include/linux/topology.h~sched_best_cpu-fix-03 include/linux/topology.h --- 25/include/linux/topology.h~sched_best_cpu-fix-03 Tue Jun 3 13:03:31 2003 +++ 25-akpm/include/linux/topology.h Tue Jun 3 13:03:31 2003 @@ -27,6 +27,25 @@ #ifndef _LINUX_TOPOLOGY_H #define _LINUX_TOPOLOGY_H +#include +#include +#include + #include +#ifndef nr_cpus_node +#define nr_cpus_node(node) (hweight_long(node_to_cpumask(node))) +#endif + +static inline int __next_node_with_cpus(int node) +{ + do + ++node; + while (node < numnodes && !nr_cpus_node(node)); + return node; +} + +#define for_each_node_with_cpus(node) \ + for (node = 0; node < numnodes; node = __next_node_with_cpus(node)) + #endif /* _LINUX_TOPOLOGY_H */ diff -puN kernel/sched.c~sched_best_cpu-fix-03 kernel/sched.c --- 25/kernel/sched.c~sched_best_cpu-fix-03 Tue Jun 3 13:03:31 2003 +++ 25-akpm/kernel/sched.c Tue Jun 3 13:03:31 2003 @@ -779,7 +779,7 @@ static int sched_best_cpu(struct task_st return best_cpu; minload = 10000000; - for (i = 0; i < numnodes; i++) { + for_each_node_with_cpus(i) { load = atomic_read(&node_nr_running[i]); if (load < minload) { minload = load; _