--- 2.4.19pre9aa1/arch/alpha/kernel/process.c.~1~ Tue Jan 22 18:54:09 2002 +++ 2.4.19pre9aa1/arch/alpha/kernel/process.c Thu May 30 00:26:18 2002 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -74,9 +75,6 @@ void cpu_idle(void) { /* An endless idle loop with no priority at all. */ - current->nice = 20; - current->counter = -100; - while (1) { /* FIXME -- EV6 and LCA45 know how to power down the CPU. */ --- 2.4.19pre9aa1/arch/alpha/kernel/smp.c.~1~ Wed May 29 22:54:04 2002 +++ 2.4.19pre9aa1/arch/alpha/kernel/smp.c Thu May 30 01:20:44 2002 @@ -82,6 +82,7 @@ int smp_num_probed; /* Internal process int smp_num_cpus = 1; /* Number that came online. */ int smp_threads_ready; /* True once the per process idle is forked. */ cycles_t cacheflush_time; +unsigned long cache_decay_ticks; int __cpu_number_map[NR_CPUS]; int __cpu_logical_map[NR_CPUS]; @@ -157,11 +158,6 @@ smp_callin(void) { int cpuid = hard_smp_processor_id(); - if (current != init_tasks[cpu_number_map(cpuid)]) { - printk("BUG: smp_calling: cpu %d current %p init_tasks[cpu_number_map(cpuid)] %p\n", - cpuid, current, init_tasks[cpu_number_map(cpuid)]); - } - DBGS(("CALLIN %d state 0x%lx\n", cpuid, current->state)); /* Turn on machine checks. */ @@ -216,9 +212,6 @@ smp_callin(void) DBGS(("smp_callin: commencing CPU %d current %p\n", cpuid, current)); - /* Setup the scheduler for this processor. */ - init_idle(); - /* ??? This should be in init_idle. */ atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; @@ -237,8 +230,9 @@ static void __init smp_tune_scheduling (int cpuid) { struct percpu_struct *cpu; - unsigned long on_chip_cache; - unsigned long freq; + unsigned long on_chip_cache; /* kB */ + unsigned long freq; /* Hz */ + unsigned long bandwidth = 350; /* MB/s */ cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset + cpuid * hwrpb->processor_size); @@ -259,29 +253,21 @@ smp_tune_scheduling (int cpuid) case EV6_CPU: case EV67_CPU: - on_chip_cache = 64 + 64; - break; - default: - on_chip_cache = 8 + 8; + on_chip_cache = 64 + 64; break; } freq = hwrpb->cycle_freq ? : est_cycle_freq; -#if 0 - /* Magic estimation stolen from x86 port. */ - cacheflush_time = freq / 1024L * on_chip_cache / 5000L; - - printk("Using heuristic of %d cycles.\n", - cacheflush_time); -#else - /* Magic value to force potential preemption of other CPUs. */ - cacheflush_time = INT_MAX; + cacheflush_time = (freq / 1000000) * (on_chip_cache << 10) / bandwidth; + cache_decay_ticks = cacheflush_time / (freq / 1000) * HZ / 1000; - printk("Using heuristic of %d cycles.\n", - cacheflush_time); -#endif + printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n", + cacheflush_time/(freq/1000000), + (cacheflush_time*100/(freq/1000000)) % 100); + printk("task migration cache decay timeout: %ld msecs.\n", + (cache_decay_ticks + 1) * 1000 / HZ); } /* @@ -506,14 +492,11 @@ smp_boot_one_cpu(int cpuid, int cpunum) if (idle == &init_task) panic("idle process is init_task for CPU %d", cpuid); - idle->processor = cpuid; - idle->cpus_runnable = 1 << cpuid; /* we schedule the first task manually */ + init_idle(idle, cpuid); + unhash_process(idle); + __cpu_logical_map[cpunum] = cpuid; __cpu_number_map[cpuid] = cpunum; - - del_from_runqueue(idle); - unhash_process(idle); - init_tasks[cpunum] = idle; DBGS(("smp_boot_one_cpu: CPU %d state 0x%lx flags 0x%lx\n", cpuid, idle->state, idle->flags)); @@ -620,14 +603,11 @@ smp_boot_cpus(void) __cpu_number_map[boot_cpuid] = 0; __cpu_logical_map[0] = boot_cpuid; - current->processor = boot_cpuid; smp_store_cpu_info(boot_cpuid); smp_tune_scheduling(boot_cpuid); smp_setup_percpu_timer(boot_cpuid); - init_idle(); - /* ??? This should be in init_idle. */ atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; --- 2.4.19pre9aa1/include/asm-alpha/smp.h.~1~ Wed May 29 23:59:10 2002 +++ 2.4.19pre9aa1/include/asm-alpha/smp.h Thu May 30 00:00:34 2002 @@ -56,7 +56,7 @@ extern int __cpu_logical_map[NR_CPUS]; #define cpu_logical_map(cpu) __cpu_logical_map[cpu] #define hard_smp_processor_id() __hard_smp_processor_id() -#define smp_processor_id() (current->processor) +#define smp_processor_id() (current->cpu) extern unsigned long cpu_present_mask; #define cpu_online_map cpu_present_mask --- 2.4.19pre9aa1/include/asm-alpha/bitops.h.~1~ Thu May 30 00:00:34 2002 +++ 2.4.19pre9aa1/include/asm-alpha/bitops.h Thu May 30 01:08:25 2002 @@ -3,6 +3,7 @@ #include #include +#include /* * Copyright 1994, Linus Torvalds. @@ -74,6 +75,17 @@ clear_bit(unsigned long nr, volatile voi * WARNING: non atomic version. */ static __inline__ void +__clear_bit(unsigned long nr, volatile void * addr) +{ + int *m = ((int *) addr) + (nr >> 5); + + *m &= ~(1 << (nr & 31)); +} + +/* + * WARNING: non atomic version. + */ +static __inline__ void __change_bit(unsigned long nr, volatile void * addr) { int *m = ((int *) addr) + (nr >> 5); @@ -264,6 +276,28 @@ static inline unsigned long ffz(unsigned #endif } +/* + * __ffs = Find First set bit in word. Undefined if no set bit exists. + */ +static inline unsigned long __ffs(unsigned long word) +{ +#if defined(__alpha_cix__) && defined(__alpha_fix__) + /* Whee. EV67 can calculate it directly. */ + unsigned long result; + __asm__("cttz %1,%0" : "=r"(result) : "r"(word)); + return result; +#else + unsigned long bits, qofs, bofs; + + __asm__("cmpbge $31,%1,%0" : "=r"(bits) : "r"(word)); + qofs = ffz_b(bits); + bits = __kernel_extbl(word, qofs); + bofs = ffz_b(~bits); + + return qofs*8 + bofs; +#endif +} + #ifdef __KERNEL__ /* @@ -365,13 +399,77 @@ found_middle: } /* - * The optimizer actually does good code for this case.. + * Find next one bit in a bitmap reasonably efficiently. + */ +static inline unsigned long +find_next_bit(void * addr, unsigned long size, unsigned long offset) +{ + unsigned long * p = ((unsigned long *) addr) + (offset >> 6); + unsigned long result = offset & ~63UL; + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 63UL; + if (offset) { + tmp = *(p++); + tmp &= ~0UL << offset; + if (size < 64) + goto found_first; + if (tmp) + goto found_middle; + size -= 64; + result += 64; + } + while (size & ~63UL) { + if ((tmp = *(p++))) + goto found_middle; + result += 64; + size -= 64; + } + if (!size) + return result; + tmp = *p; +found_first: + tmp &= ~0UL >> (64 - size); + if (!tmp) + return result + size; +found_middle: + return result + __ffs(tmp); +} + +/* + * The optimizer actually does good code for this case. */ #define find_first_zero_bit(addr, size) \ find_next_zero_bit((addr), (size), 0) +#define find_first_bit(addr, size) \ + find_next_bit((addr), (size), 0) #ifdef __KERNEL__ +/* + * Every architecture must define this function. It's the fastest + * way of searching a 140-bit bitmap where the first 100 bits are + * unlikely to be set. It's guaranteed that at least one of the 140 + * bits is set. + */ +static inline unsigned long +sched_find_first_bit(unsigned long b[3]) +{ + unsigned long b0 = b[0], b1 = b[1], b2 = b[2]; + unsigned long ofs; + + ofs = (b1 ? 64 : 128); + b1 = (b1 ? b1 : b2); + ofs = (b0 ? 0 : ofs); + b0 = (b0 ? b0 : b1); + + return __ffs(b0) + ofs; +} + + #define ext2_set_bit __test_and_set_bit #define ext2_clear_bit __test_and_clear_bit #define ext2_test_bit test_bit