From: Mathieu Desnoyers I just found out that some precision is unnecessarily lost in the arch/i386/kernel/timers/timer_tsc.c:set_cyc2ns_scale function. It uses a cpu_mhz parameter when it could use a cpu_khz. In the specific case of an Intel P4 running at 3001.171 Mhz, the truncation to 3001 Mhz leads to an imprecision of 19 microseconds per second : this is very sad for a timer with nearly nanosecond accuracy. Fix the x86_64 architecture too. Cc: george anzinger Cc: john stultz Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/timers/timer_hpet.c | 17 +++++++++++------ arch/i386/kernel/timers/timer_tsc.c | 21 +++++++++++++-------- arch/x86_64/kernel/time.c | 8 ++++---- 3 files changed, 28 insertions(+), 18 deletions(-) diff -puN arch/i386/kernel/timers/timer_hpet.c~i386-and-x86_64-tsc-set_cyc2ns_scale-imprecision arch/i386/kernel/timers/timer_hpet.c --- devel/arch/i386/kernel/timers/timer_hpet.c~i386-and-x86_64-tsc-set_cyc2ns_scale-imprecision 2005-09-13 17:57:27.000000000 -0700 +++ devel-akpm/arch/i386/kernel/timers/timer_hpet.c 2005-09-13 17:57:27.000000000 -0700 @@ -30,23 +30,28 @@ static seqlock_t monotonic_lock = SEQLOC * basic equation: * ns = cycles / (freq / ns_per_sec) * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_mhz * 10^6)) - * ns = cycles * (10^3 / cpu_mhz) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) * * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^3 * SC / cpu_mhz) / SC + * ns = cycles * (10^6 * SC / cpu_khz) / SC * ns = cycles * cyc2ns_scale / SC * * And since SC is a constant power of two, we can convert the div * into a shift. + * + * We can use khz divisor instead of mhz to keep a better percision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ static unsigned long cyc2ns_scale; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) +static inline void set_cyc2ns_scale(unsigned long cpu_khz) { - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; + cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) @@ -163,7 +168,7 @@ static int __init init_hpet(char* overri printk("Detected %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } - set_cyc2ns_scale(cpu_khz/1000); + set_cyc2ns_scale(cpu_khz); } /* set this only when cpu_has_tsc */ timer_hpet.read_timer = read_timer_tsc; diff -puN arch/i386/kernel/timers/timer_tsc.c~i386-and-x86_64-tsc-set_cyc2ns_scale-imprecision arch/i386/kernel/timers/timer_tsc.c --- devel/arch/i386/kernel/timers/timer_tsc.c~i386-and-x86_64-tsc-set_cyc2ns_scale-imprecision 2005-09-13 17:57:27.000000000 -0700 +++ devel-akpm/arch/i386/kernel/timers/timer_tsc.c 2005-09-13 17:57:27.000000000 -0700 @@ -49,23 +49,28 @@ static seqlock_t monotonic_lock = SEQLOC * basic equation: * ns = cycles / (freq / ns_per_sec) * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_mhz * 10^6)) - * ns = cycles * (10^3 / cpu_mhz) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) * * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^3 * SC / cpu_mhz) / SC + * ns = cycles * (10^6 * SC / cpu_khz) / SC * ns = cycles * cyc2ns_scale / SC * * And since SC is a constant power of two, we can convert the div - * into a shift. + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better percision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ static unsigned long cyc2ns_scale; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) +static inline void set_cyc2ns_scale(unsigned long cpu_khz) { - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; + cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) @@ -286,7 +291,7 @@ time_cpufreq_notifier(struct notifier_bl if (use_tsc) { if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq); - set_cyc2ns_scale(cpu_khz/1000); + set_cyc2ns_scale(cpu_khz); } } #endif @@ -536,7 +541,7 @@ static int __init init_tsc(char* overrid printk("Detected %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } - set_cyc2ns_scale(cpu_khz/1000); + set_cyc2ns_scale(cpu_khz); return 0; } } diff -puN arch/x86_64/kernel/time.c~i386-and-x86_64-tsc-set_cyc2ns_scale-imprecision arch/x86_64/kernel/time.c --- devel/arch/x86_64/kernel/time.c~i386-and-x86_64-tsc-set_cyc2ns_scale-imprecision 2005-09-13 17:57:27.000000000 -0700 +++ devel-akpm/arch/x86_64/kernel/time.c 2005-09-13 17:57:27.000000000 -0700 @@ -486,9 +486,9 @@ static irqreturn_t timer_interrupt(int i static unsigned int cyc2ns_scale; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) +static inline void set_cyc2ns_scale(unsigned long cpu_khz) { - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; + cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) @@ -660,7 +660,7 @@ static int time_cpufreq_notifier(struct vxtime.tsc_quot = (1000L << 32) / cpu_khz; } - set_cyc2ns_scale(cpu_khz_ref / 1000); + set_cyc2ns_scale(cpu_khz_ref); return 0; } @@ -944,7 +944,7 @@ void __init time_init(void) rdtscll_sync(&vxtime.last_tsc); setup_irq(0, &irq0); - set_cyc2ns_scale(cpu_khz / 1000); + set_cyc2ns_scale(cpu_khz); #ifndef CONFIG_SMP time_init_gtod(); _