diff -urN dyn-ref/arch/alpha/kernel/process.c dyn/arch/alpha/kernel/process.c --- dyn-ref/arch/alpha/kernel/process.c Tue Jan 22 18:54:09 2002 +++ dyn/arch/alpha/kernel/process.c Fri May 3 16:41:11 2002 @@ -75,7 +75,7 @@ { /* An endless idle loop with no priority at all. */ current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; while (1) { /* FIXME -- EV6 and LCA45 know how to power down diff -urN dyn-ref/arch/arm/kernel/process.c dyn/arch/arm/kernel/process.c --- dyn-ref/arch/arm/kernel/process.c Fri May 3 02:11:57 2002 +++ dyn/arch/arm/kernel/process.c Fri May 3 16:41:11 2002 @@ -84,7 +84,7 @@ /* endless idle loop with no priority at all */ init_idle(); current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; while (1) { void (*idle)(void) = pm_idle; diff -urN dyn-ref/arch/cris/kernel/process.c dyn/arch/cris/kernel/process.c --- dyn-ref/arch/cris/kernel/process.c Mon Feb 25 22:05:04 2002 +++ dyn/arch/cris/kernel/process.c Fri May 3 16:41:11 2002 @@ -125,7 +125,8 @@ int cpu_idle(void *unused) { while(1) { - current->counter = -100; + current->nice = 20; + current->dyn_prio = -100; schedule(); } } diff -urN dyn-ref/arch/i386/kernel/process.c dyn/arch/i386/kernel/process.c --- dyn-ref/arch/i386/kernel/process.c Mon Feb 25 22:05:04 2002 +++ dyn/arch/i386/kernel/process.c Fri May 3 16:41:11 2002 @@ -126,7 +126,7 @@ /* endless idle loop with no priority at all */ init_idle(); current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; while (1) { void (*idle)(void) = pm_idle; diff -urN dyn-ref/arch/ia64/kernel/process.c dyn/arch/ia64/kernel/process.c --- dyn-ref/arch/ia64/kernel/process.c Fri May 3 02:11:58 2002 +++ dyn/arch/ia64/kernel/process.c Fri May 3 16:41:11 2002 @@ -128,8 +128,7 @@ /* endless idle loop with no priority at all */ init_idle(); current->nice = 20; - current->counter = -100; - + current->dyn_prio = -100; while (1) { #ifdef CONFIG_SMP diff -urN dyn-ref/arch/m68k/kernel/process.c dyn/arch/m68k/kernel/process.c --- dyn-ref/arch/m68k/kernel/process.c Fri May 3 02:12:03 2002 +++ dyn/arch/m68k/kernel/process.c Fri May 3 16:41:11 2002 @@ -81,7 +81,7 @@ /* endless idle loop with no priority at all */ init_idle(); current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; idle(); } diff -urN dyn-ref/arch/mips/kernel/process.c dyn/arch/mips/kernel/process.c --- dyn-ref/arch/mips/kernel/process.c Fri May 3 02:12:05 2002 +++ dyn/arch/mips/kernel/process.c Fri May 3 16:41:12 2002 @@ -37,7 +37,7 @@ { /* endless idle loop with no priority at all */ current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; init_idle(); while (1) { diff -urN dyn-ref/arch/mips64/kernel/process.c dyn/arch/mips64/kernel/process.c --- dyn-ref/arch/mips64/kernel/process.c Fri May 3 02:12:05 2002 +++ dyn/arch/mips64/kernel/process.c Fri May 3 16:41:12 2002 @@ -35,7 +35,7 @@ /* endless idle loop with no priority at all */ init_idle(); current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; while (1) { while (!current->need_resched) if (cpu_wait) diff -urN dyn-ref/arch/parisc/kernel/process.c dyn/arch/parisc/kernel/process.c --- dyn-ref/arch/parisc/kernel/process.c Thu Feb 22 03:44:54 2001 +++ dyn/arch/parisc/kernel/process.c Fri May 3 16:41:12 2002 @@ -71,7 +71,7 @@ /* endless idle loop with no priority at all */ init_idle(); current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; while (1) { while (!current->need_resched) { diff -urN dyn-ref/arch/ppc/8260_io/uart.c dyn/arch/ppc/8260_io/uart.c --- dyn-ref/arch/ppc/8260_io/uart.c Fri May 3 02:12:05 2002 +++ dyn/arch/ppc/8260_io/uart.c Fri May 3 16:41:12 2002 @@ -1732,7 +1732,7 @@ printk("lsr = %d (jiff=%lu)...", lsr, jiffies); #endif current->state = TASK_INTERRUPTIBLE; -/* current->counter = 0; make us low-priority */ +/* current->dyn_prio = 0; make us low-priority */ schedule_timeout(char_time); if (signal_pending(current)) break; diff -urN dyn-ref/arch/ppc/8xx_io/uart.c dyn/arch/ppc/8xx_io/uart.c --- dyn-ref/arch/ppc/8xx_io/uart.c Fri May 3 02:12:05 2002 +++ dyn/arch/ppc/8xx_io/uart.c Fri May 3 16:41:12 2002 @@ -1796,7 +1796,7 @@ printk("lsr = %d (jiff=%lu)...", lsr, jiffies); #endif current->state = TASK_INTERRUPTIBLE; -/* current->counter = 0; make us low-priority */ +/* current->dyn_prio = 0; make us low-priority */ schedule_timeout(char_time); if (signal_pending(current)) break; diff -urN dyn-ref/arch/ppc/kernel/idle.c dyn/arch/ppc/kernel/idle.c --- dyn-ref/arch/ppc/kernel/idle.c Fri May 3 02:12:06 2002 +++ dyn/arch/ppc/kernel/idle.c Fri May 3 16:41:12 2002 @@ -56,7 +56,7 @@ /* endless loop with no priority at all */ current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; init_idle(); for (;;) { #ifdef CONFIG_SMP diff -urN dyn-ref/arch/s390/kernel/process.c dyn/arch/s390/kernel/process.c --- dyn-ref/arch/s390/kernel/process.c Fri May 3 02:12:06 2002 +++ dyn/arch/s390/kernel/process.c Fri May 3 16:41:12 2002 @@ -58,7 +58,7 @@ /* endless idle loop with no priority at all */ init_idle(); current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; while (1) { if (current->need_resched) { schedule(); diff -urN dyn-ref/arch/s390x/kernel/process.c dyn/arch/s390x/kernel/process.c --- dyn-ref/arch/s390x/kernel/process.c Fri May 3 02:12:06 2002 +++ dyn/arch/s390x/kernel/process.c Fri May 3 16:41:12 2002 @@ -58,7 +58,7 @@ /* endless idle loop with no priority at all */ init_idle(); current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; while (1) { if (current->need_resched) { schedule(); diff -urN dyn-ref/arch/sh/kernel/process.c dyn/arch/sh/kernel/process.c --- dyn-ref/arch/sh/kernel/process.c Tue Jan 22 18:54:51 2002 +++ dyn/arch/sh/kernel/process.c Fri May 3 16:41:12 2002 @@ -41,7 +41,7 @@ /* endless idle loop with no priority at all */ init_idle(); current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; while (1) { if (hlt_counter) { diff -urN dyn-ref/arch/sparc/kernel/process.c dyn/arch/sparc/kernel/process.c --- dyn-ref/arch/sparc/kernel/process.c Fri May 3 02:12:06 2002 +++ dyn/arch/sparc/kernel/process.c Fri May 3 16:41:12 2002 @@ -75,7 +75,7 @@ /* endless idle loop with no priority at all */ current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; init_idle(); for (;;) { @@ -129,7 +129,7 @@ { /* endless idle loop with no priority at all */ current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; init_idle(); while(1) { diff -urN dyn-ref/arch/sparc64/kernel/process.c dyn/arch/sparc64/kernel/process.c --- dyn-ref/arch/sparc64/kernel/process.c Fri May 3 02:12:07 2002 +++ dyn/arch/sparc64/kernel/process.c Fri May 3 16:41:12 2002 @@ -54,7 +54,7 @@ /* endless idle loop with no priority at all */ current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; init_idle(); for (;;) { @@ -84,7 +84,7 @@ int cpu_idle(void) { current->nice = 20; - current->counter = -100; + current->dyn_prio = -100; init_idle(); while(1) { diff -urN dyn-ref/drivers/net/slip.c dyn/drivers/net/slip.c --- dyn-ref/drivers/net/slip.c Mon Feb 25 22:05:07 2002 +++ dyn/drivers/net/slip.c Fri May 3 16:41:12 2002 @@ -1394,7 +1394,7 @@ */ do { if (busy) { - current->counter = 0; + current->time_slice = 0; schedule(); } diff -urN dyn-ref/fs/proc/array.c dyn/fs/proc/array.c --- dyn-ref/fs/proc/array.c Fri May 3 16:40:52 2002 +++ dyn/fs/proc/array.c Fri May 3 16:41:12 2002 @@ -339,8 +339,7 @@ /* scale priority and nice values from timeslices to -20..20 */ /* to make it look like a "normal" Unix priority/nice value */ - priority = task->counter; - priority = 20 - (priority * 10 + DEF_COUNTER / 2) / DEF_COUNTER; + priority = task->dyn_prio; nice = task->nice; read_lock(&tasklist_lock); diff -urN dyn-ref/include/linux/sched.h dyn/include/linux/sched.h --- dyn-ref/include/linux/sched.h Fri May 3 16:40:56 2002 +++ dyn/include/linux/sched.h Fri May 3 16:41:46 2002 @@ -153,6 +153,7 @@ extern void update_process_times(int user); extern void update_one_process(struct task_struct *p, unsigned long user, unsigned long system, int cpu); +extern void FASTCALL(expire_task(struct task_struct *p)); #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long FASTCALL(schedule_timeout(signed long timeout)); @@ -315,7 +316,7 @@ * all fields in a single cacheline that are needed for * the goodness() loop in schedule(). */ - volatile int counter; + int dyn_prio; int nice; unsigned int policy; struct mm_struct *mm; @@ -334,6 +335,9 @@ * that's just fine.) */ struct list_head run_list; + volatile long time_slice; + /* recalculation loop checkpoint */ + unsigned long rcl_last; #ifdef CONFIG_NUMA_SCHED int nid; #endif @@ -463,8 +467,30 @@ */ #define _STK_LIM (8*1024*1024) -#define DEF_COUNTER (10*HZ/100) /* 100 ms time slice */ -#define MAX_COUNTER (20*HZ/100) +/* + * Scheduling quanta. + * + * NOTE! The unix "nice" value influences how long a process + * gets. The nice value ranges from -20 to +19, where a -20 + * is a "high-priority" task, and a "+10" is a low-priority + * task. + * The default time slice for zero-nice tasks will be 50msec, + * -20 will get 90msec and +19 10msec. + */ +#define NICE_RANGE 40 +#define MIN_NICE_TSLICE 10000 +#define MAX_NICE_TSLICE 100000 +#define TASK_TIMESLICE(p) \ +({ \ + unsigned int slot = 19 - (p)->nice; \ + if (slot >= NICE_RANGE) \ + BUG(); \ + (int) ts_table[slot]; \ +}) + +#define MAX_TSLICE (MAX_NICE_TSLICE * HZ / 1000000) +/* give DYNPRIO the double of power of a tslice */ +#define MAX_DYNPRIO (MAX_TSLICE * 2) #define DEF_NICE (0) asmlinkage long sys_sched_yield(void); @@ -487,14 +513,12 @@ addr_limit: KERNEL_DS, \ exec_domain: &default_exec_domain, \ lock_depth: -1, \ - counter: DEF_COUNTER, \ nice: DEF_NICE, \ policy: SCHED_OTHER, \ mm: NULL, \ active_mm: &init_mm, \ cpus_runnable: -1UL, \ cpus_allowed: -1UL, \ - run_list: LIST_HEAD_INIT(tsk.run_list), \ next_task: &tsk, \ prev_task: &tsk, \ p_opptr: &tsk, \ @@ -923,11 +947,14 @@ #define thread_group_leader(p) (p->pid == p->tgid) +extern unsigned long rcl_curr; + #define del_from_runqueue(p) \ do { \ nr_running_dec(); \ list_del(&(p)->run_list); \ (p)->run_list.next = NULL; \ + (p)->rcl_last = rcl_curr; \ } while(0) static inline int task_on_runqueue(struct task_struct *p) diff -urN dyn-ref/kernel/exit.c dyn/kernel/exit.c --- dyn-ref/kernel/exit.c Fri May 3 16:40:33 2002 +++ dyn/kernel/exit.c Fri May 3 16:41:12 2002 @@ -64,9 +64,9 @@ * was given away by the parent in the first place.) */ if (p->get_child_timeslice) { - current->counter += p->counter; - if (current->counter >= MAX_COUNTER) - current->counter = MAX_COUNTER; + current->time_slice += p->time_slice; + if (current->time_slice > MAX_TSLICE) + current->time_slice = MAX_TSLICE; } p->pid = 0; free_task_struct(p); diff -urN dyn-ref/kernel/fork.c dyn/kernel/fork.c --- dyn-ref/kernel/fork.c Fri May 3 16:40:56 2002 +++ dyn/kernel/fork.c Fri May 3 16:41:12 2002 @@ -761,9 +761,9 @@ * to do a few simple things and then exec(). */ { - int counter = current->counter; - p->counter = (counter + 1) >> 1; - current->counter = counter >> 1; + int time_slice = current->time_slice; + p->time_slice = (time_slice + 1) >> 1; + current->time_slice = time_slice >> 1; p->policy &= ~SCHED_YIELD; current->policy |= SCHED_YIELD; current->need_resched = 1; diff -urN dyn-ref/kernel/sched.c dyn/kernel/sched.c --- dyn-ref/kernel/sched.c Fri May 3 16:40:33 2002 +++ dyn/kernel/sched.c Fri May 3 16:41:12 2002 @@ -46,30 +46,9 @@ extern void mem_use(void); -/* - * Scheduling quanta. - * - * NOTE! The unix "nice" value influences how long a process - * gets. The nice value ranges from -20 to +19, where a -20 - * is a "high-priority" task, and a "+10" is a low-priority - * task. - * - * We want the time-slice to be around 50ms or so, so this - * calculation depends on the value of HZ. - */ -#if HZ < 200 -#define TICK_SCALE(x) ((x) >> 2) -#elif HZ < 400 -#define TICK_SCALE(x) ((x) >> 1) -#elif HZ < 800 -#define TICK_SCALE(x) (x) -#elif HZ < 1600 -#define TICK_SCALE(x) ((x) << 1) -#else -#define TICK_SCALE(x) ((x) << 2) -#endif +static unsigned char ts_table[NICE_RANGE]; -#define NICE_TO_TICKS(nice) (TICK_SCALE(20-(nice))+1) +#define MM_AFFINITY_BONUS 1 /* @@ -93,6 +72,8 @@ spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */ rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */ +unsigned long rcl_curr; + #ifndef CONFIG_NUMA_SCHED static LIST_HEAD(runqueue_head); @@ -176,10 +157,11 @@ * Don't do any other calculations if the time slice is * over.. */ - weight = p->counter; - if (!weight) - goto out; - + if (!p->time_slice) + return 0; + + weight = p->dyn_prio + 1; + #ifdef CONFIG_SMP /* Give a largish advantage to the same processor... */ /* (this is equivalent to penalizing other processors) */ @@ -189,7 +171,7 @@ /* .. and a slight advantage to the current MM */ if (p->mm == this_mm || !p->mm) - weight += 1; + weight += MM_AFFINITY_BONUS; weight += 20 - p->nice; goto out; } @@ -359,6 +341,12 @@ */ static inline void add_to_runqueue(struct task_struct * p) { + unsigned int dyn_prio = rcl_curr - p->rcl_last; + if (dyn_prio > MAX_DYNPRIO) + dyn_prio = MAX_DYNPRIO; + p->dyn_prio += dyn_prio; + if (p->dyn_prio > MAX_DYNPRIO) + p->dyn_prio = MAX_DYNPRIO; list_add(&p->run_list, numa_runqueue_head(p->nid)); nr_running_inc(); } @@ -571,6 +559,30 @@ __schedule_tail(prev); } +void expire_task(struct task_struct *p) +{ + /* + * Only idle tasks have dyn_prio < 0 and for them + * the timeslice doesn't matter, idle tasks must + * be rescheduled only by reschedule_idle(). + */ + if (unlikely(p->dyn_prio < 0)) { + if (p != idle_task(smp_processor_id())) + BUG(); + return; + } + + if (unlikely(!p->time_slice)) + goto need_resched; + + if (!--p->time_slice) { + if (p->dyn_prio) + p->dyn_prio--; + need_resched: + p->need_resched = 1; + } +} + /* * 'schedule()' is the scheduler function. It's a very simple and nice * scheduler: it's not perfect, but certainly works for most things. @@ -609,20 +621,20 @@ /* move an exhausted RR process to be last.. */ if (unlikely(prev->policy == SCHED_RR)) - if (!prev->counter) { - prev->counter = NICE_TO_TICKS(prev->nice); + if (!prev->time_slice) { + prev->time_slice = TASK_TIMESLICE(prev); move_last_runqueue(prev); } switch (prev->state) { - case TASK_INTERRUPTIBLE: - if (signal_pending(prev)) { - prev->state = TASK_RUNNING; - break; - } - default: - del_from_runqueue(prev); - case TASK_RUNNING:; + case TASK_INTERRUPTIBLE: + if (signal_pending(prev)) { + prev->state = TASK_RUNNING; + break; + } + default: + del_from_runqueue(prev); + case TASK_RUNNING:; } prev->need_resched = 0; @@ -668,20 +680,25 @@ /* Do we need to re-calculate counters? */ if (unlikely(!c)) { - struct task_struct *p; - - spin_unlock_irq(&runqueue_lock); - read_lock(&tasklist_lock); - for_each_task(p) { + ++rcl_curr; + list_for_each(tmp, numa_runqueue_head(numa_node_id())) { + p = list_entry(tmp, struct task_struct, run_list); + p->time_slice = TASK_TIMESLICE(p); + } #ifdef CONFIG_NUMA_SCHED - if (!recalculate_all && p->nid != numa_node_id()) - continue; -#endif - p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice); - p->get_child_timeslice = 0; + if (recalculate_all) { + int nid; + + for (nid = 0; nid < numnodes; nid++) { + if (nid == numa_node_id()) + continue; + list_for_each(tmp, numa_runqueue_head(nid)) { + p = list_entry(tmp, struct task_struct, run_list); + p->time_slice = TASK_TIMESLICE(p); + } + } } - read_unlock(&tasklist_lock); - spin_lock_irq(&runqueue_lock); +#endif goto repeat_schedule; } @@ -1191,7 +1208,7 @@ read_lock(&tasklist_lock); p = find_process_by_pid(pid); if (p) - jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : NICE_TO_TICKS(p->nice), + jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : TASK_TIMESLICE(p), &t); read_unlock(&tasklist_lock); if (p) @@ -1491,6 +1508,18 @@ extern void init_timervecs (void); +static void fill_tslice_map(void) +{ + int i; + + for (i = 0; i < NICE_RANGE; i++) { + ts_table[i] = ((MIN_NICE_TSLICE + + ((MAX_NICE_TSLICE - + MIN_NICE_TSLICE) / (NICE_RANGE - 1)) * i) * HZ) / 1000000; + if (!ts_table[i]) ts_table[i] = 1; + } +} + void __init sched_init(void) { /* @@ -1504,6 +1533,8 @@ for(nr = 0; nr < PIDHASH_SZ; nr++) pidhash[nr] = NULL; + + fill_tslice_map(); init_timervecs(); diff -urN dyn-ref/kernel/timer.c dyn/kernel/timer.c --- dyn-ref/kernel/timer.c Fri May 3 16:40:32 2002 +++ dyn/kernel/timer.c Fri May 3 16:41:12 2002 @@ -599,10 +599,7 @@ update_one_process(p, user_tick, system, cpu); if (p->pid) { - if (--p->counter <= 0) { - p->counter = 0; - p->need_resched = 1; - } + expire_task(p); if (p->nice > 0) kstat.per_cpu_nice[cpu] += user_tick; else diff -urN dyn-ref/mm/oom_kill.c dyn/mm/oom_kill.c --- dyn-ref/mm/oom_kill.c Fri May 3 16:40:33 2002 +++ dyn/mm/oom_kill.c Fri May 3 16:41:12 2002 @@ -148,7 +148,8 @@ * all the memory it needs. That way it should be able to * exit() and clear out its resources quickly... */ - p->counter = 5 * HZ; + p->time_slice = 2 * MAX_TSLICE; + p->dyn_prio = MAX_DYNPRIO + 1; /* This process has hardware access, be more careful. */ if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) {