diff -urNp x-ref/arch/alpha/kernel/time.c x/arch/alpha/kernel/time.c --- x-ref/arch/alpha/kernel/time.c 2003-01-29 06:13:17.000000000 +0100 +++ x/arch/alpha/kernel/time.c 2003-01-30 22:34:17.000000000 +0100 @@ -48,7 +48,7 @@ #include "proto.h" #include "irq_impl.h" -extern rwlock_t xtime_lock; +extern frlock_t xtime_lock; extern unsigned long wall_jiffies; /* kernel/timer.c */ static int set_rtc_mmss(unsigned long); @@ -101,7 +101,7 @@ void timer_interrupt(int irq, void *dev, alpha_do_profile(regs->pc); #endif - write_lock(&xtime_lock); + fr_write_lock(&xtime_lock); /* * Calculate how many ticks have passed since the last update, @@ -133,7 +133,7 @@ void timer_interrupt(int irq, void *dev, state.last_rtc_update = xtime.tv_sec - (tmp ? 600 : 0); } - write_unlock(&xtime_lock); + fr_write_unlock(&xtime_lock); } void @@ -392,18 +392,19 @@ time_init(void) void do_gettimeofday(struct timeval *tv) { - unsigned long sec, usec, lost, flags; + unsigned long sec, usec, lost, seq; unsigned long delta_cycles, delta_usec, partial_tick; - read_lock_irqsave(&xtime_lock, flags); + do { + seq = fr_read_begin(&xtime_lock); - delta_cycles = rpcc() - state.last_time; - sec = xtime.tv_sec; - usec = xtime.tv_usec; - partial_tick = state.partial_tick; - lost = jiffies - wall_jiffies; + delta_cycles = rpcc() - state.last_time; + sec = xtime.tv_sec; + usec = xtime.tv_usec; + partial_tick = state.partial_tick; + lost = jiffies - wall_jiffies; - read_unlock_irqrestore(&xtime_lock, flags); + } while (seq != fr_read_end(&xtime_lock)); #ifdef CONFIG_SMP /* Until and unless we figure out how to get cpu cycle counters @@ -445,7 +446,7 @@ do_settimeofday(struct timeval *tv) unsigned long delta_usec; long sec, usec; - write_lock_irq(&xtime_lock); + fr_write_lock_irq(&xtime_lock); /* The offset that is added into time in do_gettimeofday above must be subtracted out here to keep a coherent view of the @@ -476,7 +477,7 @@ do_settimeofday(struct timeval *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; - write_unlock_irq(&xtime_lock); + fr_write_unlock_irq(&xtime_lock); } diff -urNp x-ref/arch/i386/kernel/time.c x/arch/i386/kernel/time.c --- x-ref/arch/i386/kernel/time.c 2003-01-29 06:13:21.000000000 +0100 +++ x/arch/i386/kernel/time.c 2003-01-30 22:37:24.000000000 +0100 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -79,7 +80,7 @@ static unsigned long last_tsc_low; /* ls */ unsigned long fast_gettimeoffset_quotient; -extern rwlock_t xtime_lock; +extern frlock_t xtime_lock; extern unsigned long wall_jiffies; spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; @@ -158,6 +159,7 @@ extern spinlock_t i8259A_lock; static unsigned long do_slow_gettimeoffset(void) { int count; + unsigned long flags; static int count_p = LATCH; /* for the first call after boot */ static unsigned long jiffies_p = 0; @@ -168,7 +170,7 @@ static unsigned long do_slow_gettimeoffs unsigned long jiffies_t; /* gets recalled with irq locally disabled */ - spin_lock(&i8253_lock); + spin_lock_irqsave(&i8253_lock, flags); /* timer count may underflow right here */ outb_p(0x00, 0x43); /* latch the count ASAP */ @@ -190,7 +192,7 @@ static unsigned long do_slow_gettimeoffs count = LATCH - 1; } - spin_unlock(&i8253_lock); + spin_unlock_irqrestore(&i8253_lock, flags); /* * avoiding timer inconsistencies (they are rare, but they happen)... @@ -211,13 +213,13 @@ static unsigned long do_slow_gettimeoffs int i; - spin_lock(&i8259A_lock); + spin_lock_irqsave(&i8259A_lock, flags); /* * This is tricky when I/O APICs are used; * see do_timer_interrupt(). */ i = inb(0x20); - spin_unlock(&i8259A_lock); + spin_unlock_irqrestore(&i8259A_lock, flags); /* assumption about timer being IRQ0 */ if (i & 0x01) { @@ -441,19 +443,21 @@ void __cyclone_delay(unsigned long loops */ void do_gettimeofday(struct timeval *tv) { - unsigned long flags; + unsigned long seq; unsigned long usec, sec; - read_lock_irqsave(&xtime_lock, flags); - usec = do_gettimeoffset(); - { - unsigned long lost = jiffies - wall_jiffies; - if (lost) - usec += lost * (1000000 / HZ); - } - sec = xtime.tv_sec; - usec += xtime.tv_usec; - read_unlock_irqrestore(&xtime_lock, flags); + do { + seq = fr_read_begin(&xtime_lock); + usec = do_gettimeoffset(); + { + unsigned long lost = jiffies - wall_jiffies; + if (lost) + usec += lost * (1000000 / HZ); + } + + sec = xtime.tv_sec; + usec += xtime.tv_usec; + } while (seq != fr_read_end(&xtime_lock)); while (usec >= 1000000) { usec -= 1000000; @@ -466,7 +470,7 @@ void do_gettimeofday(struct timeval *tv) void do_settimeofday(struct timeval *tv) { - write_lock_irq(&xtime_lock); + fr_write_lock_irq(&xtime_lock); /* * This is revolting. We need to set "xtime" correctly. However, the * value in this location is the value at the most recent update of @@ -486,7 +490,7 @@ void do_settimeofday(struct timeval *tv) time_status |= STA_UNSYNC; time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; - write_unlock_irq(&xtime_lock); + fr_write_unlock_irq(&xtime_lock); } /* @@ -652,7 +656,7 @@ static void timer_interrupt(int irq, voi * the irq version of write_lock because as just said we have irq * locally disabled. -arca */ - write_lock(&xtime_lock); + fr_write_lock(&xtime_lock); if(use_cyclone) mark_timeoffset_cyclone(); @@ -708,7 +712,7 @@ static void timer_interrupt(int irq, voi do_timer_interrupt(irq, NULL, regs); - write_unlock(&xtime_lock); + fr_write_unlock(&xtime_lock); } diff -urNp x-ref/arch/ia64/kernel/time.c x/arch/ia64/kernel/time.c --- x-ref/arch/ia64/kernel/time.c 2002-11-29 02:22:55.000000000 +0100 +++ x/arch/ia64/kernel/time.c 2003-01-30 22:34:17.000000000 +0100 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -23,7 +24,7 @@ #include #include -extern rwlock_t xtime_lock; +extern frlock_t xtime_lock; extern unsigned long wall_jiffies; extern unsigned long last_time_offset; @@ -86,7 +87,7 @@ gettimeoffset (void) void do_settimeofday (struct timeval *tv) { - write_lock_irq(&xtime_lock); + fr_write_lock_irq(&xtime_lock); { /* * This is revolting. We need to set "xtime" correctly. However, the value @@ -108,16 +109,16 @@ do_settimeofday (struct timeval *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; } - write_unlock_irq(&xtime_lock); + fr_write_unlock_irq(&xtime_lock); } void do_gettimeofday (struct timeval *tv) { - unsigned long flags, usec, sec, old; + unsigned long seq, usec, sec, old; - read_lock_irqsave(&xtime_lock, flags); - { + do { + seq = fr_read_begin(&xtime_lock); usec = gettimeoffset(); /* @@ -134,8 +135,7 @@ do_gettimeofday (struct timeval *tv) sec = xtime.tv_sec; usec += xtime.tv_usec; - } - read_unlock_irqrestore(&xtime_lock, flags); + } while (seq != fr_read_end(&xtime_lock)); while (usec >= 1000000) { usec -= 1000000; @@ -178,10 +178,10 @@ timer_interrupt(int irq, void *dev_id, s * another CPU. We need to avoid to SMP race by acquiring the * xtime_lock. */ - write_lock(&xtime_lock); + fr_write_lock(&xtime_lock); do_timer(regs); local_cpu_data->itm_next = new_itm; - write_unlock(&xtime_lock); + fr_write_unlock(&xtime_lock); } else local_cpu_data->itm_next = new_itm; diff -urNp x-ref/arch/x86_64/kernel/time.c x/arch/x86_64/kernel/time.c --- x-ref/arch/x86_64/kernel/time.c 2003-01-29 06:13:33.000000000 +0100 +++ x/arch/x86_64/kernel/time.c 2003-01-30 22:36:08.000000000 +0100 @@ -22,7 +22,7 @@ #include #include -extern rwlock_t xtime_lock; +extern frlock_t xtime_lock; spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED; @@ -90,17 +90,22 @@ unsigned int (*do_gettimeoffset_safe)(vo void do_gettimeofday(struct timeval *tv) { - unsigned long flags; + unsigned long flags, t, seq; unsigned int sec, usec; - read_lock_irqsave(&xtime_lock, flags); + spin_lock_irqsave(&time_offset_lock, flags); + do { + seq = fr_read_begin(&xtime_lock); + + sec = xtime.tv_sec; + usec = xtime.tv_usec; - sec = xtime.tv_sec; - usec = xtime.tv_usec - + (jiffies - wall_jiffies) * tick - + do_gettimeoffset_safe(); + t = (jiffies - wall_jiffies) * (1000000L / HZ) + do_gettimeoffset(); + if (t > timeoffset) timeoffset = t; + usec += timeoffset; - read_unlock_irqrestore(&xtime_lock, flags); + } while (seq != fr_read_end(&xtime_lock)); + spin_unlock_irqrestore(&time_offset_lock, flags); tv->tv_sec = sec + usec / 1000000; tv->tv_usec = usec % 1000000; @@ -114,7 +119,7 @@ void do_gettimeofday(struct timeval *tv) void do_settimeofday(struct timeval *tv) { - write_lock_irq(&xtime_lock); + fr_write_lock_irq(&xtime_lock); vxtime_lock(); tv->tv_usec -= (jiffies - wall_jiffies) * tick @@ -133,7 +138,7 @@ void do_settimeofday(struct timeval *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; - write_unlock_irq(&xtime_lock); + fr_write_unlock_irq(&xtime_lock); } /* @@ -216,7 +221,7 @@ static void timer_interrupt(int irq, voi * variables, because both do_timer() and us change them -arca+vojtech */ - write_lock(&xtime_lock); + fr_write_lock(&xtime_lock); vxtime_lock(); { @@ -292,7 +297,7 @@ static void timer_interrupt(int irq, voi } vxtime_unlock(); - write_unlock(&xtime_lock); + fr_write_unlock(&xtime_lock); } static unsigned long get_cmos_time(void) diff -urNp x-ref/include/linux/frlock.h x/include/linux/frlock.h --- x-ref/include/linux/frlock.h 1970-01-01 01:00:00.000000000 +0100 +++ x/include/linux/frlock.h 2003-01-30 22:34:17.000000000 +0100 @@ -0,0 +1,100 @@ +#ifndef __LINUX_FRLOCK_H +#define __LINUX_FRLOCK_H + +/* + * Fast read-write spinlocks. + * + * Fast reader/writer locks without starving writers. This type of + * lock for data where the reader wants a consitent set of information + * and is willing to retry if the information changes. Readers never + * block but they may have to retry if a writer is in + * progress. Writers do not wait for readers. + * + * Generalization on sequence variables used for gettimeofday on x86-64 + * by Andrea Arcangeli + * + * This is not as cache friendly as brlock. Also, this will not work + * for data that contains pointers, because any writer could + * invalidate a pointer that a reader was following. + * + * + * Expected reader usage: + * do { + * seq = fr_read_begin(); + * ... + * } while (seq != fr_read_end()); + * + * On non-SMP the spin locks disappear but the writer still needs + * to increment the sequence variables because an interrupt routine could + * change the state of the data. + */ + +#include +#include + +typedef struct { + spinlock_t lock; + unsigned pre_sequence; + unsigned post_sequence; +} frlock_t; + +#define FR_LOCK_UNLOCKED { SPIN_LOCK_UNLOCKED, 0, 0 } +#define frlock_init(x) do { *(x) = FR_LOCK_UNLOCKED; } while (0) + +static inline void fr_write_lock(frlock_t *rw) +{ + spin_lock(&rw->lock); + rw->pre_sequence++; + wmb(); +} + +static inline void fr_write_unlock(frlock_t *rw) +{ + wmb(); + rw->post_sequence++; + spin_unlock(&rw->lock); +} + +static inline int fr_write_trylock(frlock_t *rw) +{ + int ret = spin_trylock(&rw->lock); + + if (ret) { + ++rw->pre_sequence; + wmb(); + } + return ret; +} + +static inline unsigned fr_read_begin(frlock_t *rw) +{ + unsigned ret = rw->post_sequence; + rmb(); + return ret; + +} + +static inline unsigned fr_read_end(frlock_t *rw) +{ + rmb(); + return rw->pre_sequence; +} + +/* + * Possible sw/hw IRQ protected versions of the interfaces. + */ +#define fr_write_lock_irqsave(lock, flags) \ + do { local_irq_save(flags); fr_write_lock(lock); } while (0) +#define fr_write_lock_irq(lock) \ + do { local_irq_disable(); fr_write_lock(lock); } while (0) +#define fr_write_lock_bh(lock) \ + do { local_bh_disable(); fr_write_lock(lock); } while (0) + +#define fr_write_unlock_irqrestore(lock, flags) \ + do { fr_write_unlock(lock); local_irq_restore(flags); } while(0) +#define fr_write_unlock_irq(lock) \ + do { fr_write_unlock(lock); local_irq_enable(); } while(0) +#define fr_write_unlock_bh(lock) \ + do { fr_write_unlock(lock); local_bh_enable(); } while(0) + +#endif /* __LINUX_FRLOCK_H */ diff -urNp x-ref/kernel/time.c x/kernel/time.c --- x-ref/kernel/time.c 2002-11-29 02:23:18.000000000 +0100 +++ x/kernel/time.c 2003-01-30 22:34:17.000000000 +0100 @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -38,7 +39,7 @@ struct timezone sys_tz; /* The xtime_lock is not only serializing the xtime read/writes but it's also serializing all accesses to the global NTP variables now. */ -extern rwlock_t xtime_lock; +extern frlock_t xtime_lock; #if !defined(__alpha__) && !defined(__ia64__) @@ -79,7 +80,7 @@ asmlinkage long sys_stime(int * tptr) return -EPERM; if (get_user(value, tptr)) return -EFAULT; - write_lock_irq(&xtime_lock); + fr_write_lock_irq(&xtime_lock); vxtime_lock(); xtime.tv_sec = value; xtime.tv_usec = 0; @@ -88,7 +89,7 @@ asmlinkage long sys_stime(int * tptr) time_status |= STA_UNSYNC; time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; - write_unlock_irq(&xtime_lock); + fr_write_unlock_irq(&xtime_lock); return 0; } @@ -127,11 +128,11 @@ asmlinkage long sys_gettimeofday(struct */ inline static void warp_clock(void) { - write_lock_irq(&xtime_lock); + fr_write_lock_irq(&xtime_lock); vxtime_lock(); xtime.tv_sec += sys_tz.tz_minuteswest * 60; vxtime_unlock(); - write_unlock_irq(&xtime_lock); + fr_write_unlock_irq(&xtime_lock); } /* @@ -235,7 +236,7 @@ int do_adjtimex(struct timex *txc) if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) return -EINVAL; - write_lock_irq(&xtime_lock); + fr_write_lock_irq(&xtime_lock); result = time_state; /* mostly `TIME_OK' */ /* Save for later - semantics of adjtime is to return old value */ @@ -390,7 +391,7 @@ leave: if ((time_status & (STA_UNSYNC|ST txc->calcnt = pps_calcnt; txc->errcnt = pps_errcnt; txc->stbcnt = pps_stbcnt; - write_unlock_irq(&xtime_lock); + fr_write_unlock_irq(&xtime_lock); do_gettimeofday(&txc->time); return(result); } diff -urNp x-ref/kernel/timer.c x/kernel/timer.c --- x-ref/kernel/timer.c 2003-01-30 22:33:54.000000000 +0100 +++ x/kernel/timer.c 2003-01-30 22:34:17.000000000 +0100 @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -640,7 +641,7 @@ unsigned long wall_jiffies; /* * This spinlock protect us from races in SMP while playing with xtime. -arca */ -rwlock_t xtime_lock = RW_LOCK_UNLOCKED; +frlock_t xtime_lock = FR_LOCK_UNLOCKED; static inline void update_times(void) { @@ -651,7 +652,7 @@ static inline void update_times(void) * just know that the irqs are locally enabled and so we don't * need to save/restore the flags of the local CPU here. -arca */ - write_lock_irq(&xtime_lock); + fr_write_lock_irq(&xtime_lock); vxtime_lock(); ticks = jiffies - wall_jiffies; @@ -660,7 +661,7 @@ static inline void update_times(void) update_wall_time(ticks); } vxtime_unlock(); - write_unlock_irq(&xtime_lock); + fr_write_unlock_irq(&xtime_lock); calc_load(ticks); }