From: john stultz The patch below is my new acpi-pm-timer-fixes_A5 patch I want to sync w/ Andrew if no one sees anything too critically stupid in the above. Boots and works on the systems I've tested (which unfortunately don't seem to have the quirk we're fixing). YMMV. Its against 2.6.1-rc2 + Dominik's acpi-pm-timer patch (found in the -mm tree). Changes since A4 (acpi-pm-timer-fixes-2 in -mm) are: o multi-read workaround for PIIX4 and other troublesome chipsets o whitespace cleanups (from Dmitry Torokhov) o merged my verbose-timesource-acpi-pm patch in (as verbose-timesource now in mainline) o use jiffies_64 instead of jiffies to avoid overflow trouble 25-akpm/Documentation/kernel-parameters.txt | 2 25-akpm/arch/i386/Kconfig | 18 --- 25-akpm/arch/i386/kernel/acpi/boot.c | 10 + 25-akpm/arch/i386/kernel/timers/common.c | 20 +++ 25-akpm/arch/i386/kernel/timers/timer_cyclone.c | 21 --- 25-akpm/arch/i386/kernel/timers/timer_pm.c | 137 +++++++++++++++++++----- 25-akpm/drivers/acpi/Kconfig | 18 +++ 25-akpm/include/asm-i386/timer.h | 1 8 files changed, 158 insertions(+), 69 deletions(-) diff -puN arch/i386/Kconfig~acpi-timer-fixes-A5 arch/i386/Kconfig --- 25/arch/i386/Kconfig~acpi-timer-fixes-A5 Tue Jan 6 13:53:56 2004 +++ 25-akpm/arch/i386/Kconfig Tue Jan 6 13:53:56 2004 @@ -416,24 +416,6 @@ config HPET_TIMER config HPET_EMULATE_RTC def_bool HPET_TIMER && RTC=y -config X86_PM_TIMER - bool "Power Management Timer Support" - depends on (!X86_VISWS && !X86_VISWS) && EXPERIMENTAL - default n - help - The Power Management Timer is available on all ACPI-capable, - in most cases even if ACPI is unusable or blacklisted. - - This timing source is not affected by powermanagement features - like aggressive processor idling, throttling, frequency and/or - voltage scaling, unlike the commonly used Time Stamp Counter - (TSC) timing source. - - So, if you see messages like 'Losing too many ticks!' in the - kernel logs, and/or you are using a this on a notebook which - does not yet have an HPET (see above), you should say "Y" - here. Otherwise, say "N". - config SMP bool "Symmetric multi-processing support" ---help--- diff -puN arch/i386/kernel/acpi/boot.c~acpi-timer-fixes-A5 arch/i386/kernel/acpi/boot.c --- 25/arch/i386/kernel/acpi/boot.c~acpi-timer-fixes-A5 Tue Jan 6 13:53:56 2004 +++ 25-akpm/arch/i386/kernel/acpi/boot.c Tue Jan 6 13:53:56 2004 @@ -363,15 +363,19 @@ static int __init acpi_parse_hpet(unsign } #endif -/* detect the location of the ACPI PM Timer +/* detect the location of the ACPI PM Timer */ #ifdef CONFIG_X86_PM_TIMER extern u32 pmtmr_ioport; static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) { - struct fadt_descriptor_rev2 *fadt; + struct fadt_descriptor_rev2 *fadt =0; - fadt = __va(phys); + fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size); + if(!fadt) { + printk(KERN_WARNING PREFIX "Unable to map FADT\n"); + return 0; + } if (fadt->revision >= FADT2_REVISION_ID) { /* FADT rev. 2 */ diff -puN arch/i386/kernel/timers/common.c~acpi-timer-fixes-A5 arch/i386/kernel/timers/common.c --- 25/arch/i386/kernel/timers/common.c~acpi-timer-fixes-A5 Tue Jan 6 13:53:56 2004 +++ 25-akpm/arch/i386/kernel/timers/common.c Tue Jan 6 13:53:56 2004 @@ -137,3 +137,23 @@ bad_calibration: } #endif +/* calculate cpu_khz */ +void __init init_cpu_khz(void) +{ + if (cpu_has_tsc) { + unsigned long tsc_quotient = calibrate_tsc(); + if (tsc_quotient) { + /* report CPU clock rate in Hz. + * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = + * clock/second. Our precision is about 100 ppm. + */ + { unsigned long eax=0, edx=1000; + __asm__("divl %2" + :"=a" (cpu_khz), "=d" (edx) + :"r" (tsc_quotient), + "0" (eax), "1" (edx)); + printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + } + } + } +} diff -puN arch/i386/kernel/timers/timer_cyclone.c~acpi-timer-fixes-A5 arch/i386/kernel/timers/timer_cyclone.c --- 25/arch/i386/kernel/timers/timer_cyclone.c~acpi-timer-fixes-A5 Tue Jan 6 13:53:56 2004 +++ 25-akpm/arch/i386/kernel/timers/timer_cyclone.c Tue Jan 6 13:53:56 2004 @@ -212,26 +212,7 @@ static int __init init_cyclone(char* ove } } - /* init cpu_khz. - * XXX - This should really be done elsewhere, - * and in a more generic fashion. -johnstul@us.ibm.com - */ - if (cpu_has_tsc) { - unsigned long tsc_quotient = calibrate_tsc(); - if (tsc_quotient) { - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - __asm__("divl %2" - :"=a" (cpu_khz), "=d" (edx) - :"r" (tsc_quotient), - "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); - } - } - } + init_cpu_khz(); /* Everything looks good! */ return 0; diff -puN arch/i386/kernel/timers/timer_pm.c~acpi-timer-fixes-A5 arch/i386/kernel/timers/timer_pm.c --- 25/arch/i386/kernel/timers/timer_pm.c~acpi-timer-fixes-A5 Tue Jan 6 13:53:56 2004 +++ 25-akpm/arch/i386/kernel/timers/timer_pm.c Tue Jan 6 13:53:56 2004 @@ -30,7 +30,37 @@ u32 pmtmr_ioport = 0; /* value of the Power timer at last timer interrupt */ static u32 offset_tick; +static u32 offset_delay; +static unsigned long long monotonic_base; +static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; + +#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ + +/*helper function to safely read acpi pm timesource*/ +static inline u32 read_pmtmr(void) +{ + u32 v1,v2,v3; + /* It has been reported that because of various broken + * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time + * source is not latched, so you must read it multiple + * times to insure a safe value is read. + */ + int dbg_failed = 0; + do { + if(dbg_failed) + printk("bad pmtmr read: (%lu, %lu, %lu)\n", + v1,v2,v3); + v1 = inl(pmtmr_ioport); + v2 = inl(pmtmr_ioport); + v3 = inl(pmtmr_ioport); + dbg_failed = 1; + } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) + || (v3 > v1 && v3 < v2)); + + /* mask the output to 24 bits */ + return v2 & ACPI_PM_MASK; +} static int init_pmtmr(char* override) { @@ -44,42 +74,105 @@ static int init_pmtmr(char* override) return -ENODEV; /* "verify" this timing source */ - value1 = inl(pmtmr_ioport); - value1 &= 0xFFFFFF; - for (i=0; i < 10000; i++) { - value2 = inl(pmtmr_ioport); - value2 &= 0xFFFFFF; + value1 = read_pmtmr(); + for (i = 0; i < 10000; i++) { + value2 = read_pmtmr(); if (value2 == value1) continue; if (value2 > value1) - return 0; + goto pm_good; if ((value2 < value1) && ((value2) < 0xFFF)) - return 0; + goto pm_good; printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2); return -EINVAL; } printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1); return -ENODEV; + +pm_good: + init_cpu_khz(); + return 0; } +static inline u32 cyc2us(u32 cycles) +{ + /* The Power Management Timer ticks at 3.579545 ticks per microsecond. + * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] + * + * Even with HZ = 100, delta is at maximum 35796 ticks, so it can + * easily be multiplied with 286 (=0x11E) without having to fear + * u32 overflows. + */ + cycles *= 286; + return (cycles >> 10); +} /* * this gets called during each timer interrupt + * - Called while holding the writer xtime_lock */ static void mark_offset_pmtmr(void) { - offset_tick = inl(pmtmr_ioport); - offset_tick &= 0xFFFFFF; /* limit it to 24 bits */ - return; + u32 lost, delta, last_offset; + static int first_run = 1; + last_offset = offset_tick; + + write_seqlock(&monotonic_lock); + + offset_tick = read_pmtmr(); + + /* calculate tick interval */ + delta = (offset_tick - last_offset) & ACPI_PM_MASK; + + /* convert to usecs */ + delta = cyc2us(delta); + + /* update the monotonic base value */ + monotonic_base += delta * NSEC_PER_USEC; + write_sequnlock(&monotonic_lock); + + /* convert to ticks */ + delta += offset_delay; + lost = delta / (USEC_PER_SEC / HZ); + offset_delay = delta % (USEC_PER_SEC / HZ); + + + /* compensate for lost ticks */ + if (lost >= 2) + jiffies_64 += lost - 1; + + /* don't calculate delay for first run, + or if we've got less then a tick */ + if (first_run || (lost < 1)) { + first_run = 0; + offset_delay = 0; + } } static unsigned long long monotonic_clock_pmtmr(void) { - return 0; + u32 last_offset, this_offset; + unsigned long long base, ret; + unsigned seq; + + + /* atomically read monotonic base & last_offset */ + do { + seq = read_seqbegin(&monotonic_lock); + last_offset = offset_tick; + base = monotonic_base; + } while (read_seqretry(&monotonic_lock, seq)); + + /* Read the pmtmr */ + this_offset = read_pmtmr(); + + /* convert to nanoseconds */ + ret = (this_offset - last_offset) & ACPI_PM_MASK; + ret = base + (cyc2us(ret) * NSEC_PER_USEC); + return ret; } - /* * copied from delay_pit */ @@ -99,33 +192,23 @@ static void delay_pmtmr(unsigned long lo /* * get the offset (in microseconds) from the last call to mark_offset() + * - Called holding a reader xtime_lock */ static unsigned long get_offset_pmtmr(void) { u32 now, offset, delta = 0; offset = offset_tick; - now = inl(pmtmr_ioport); - now &= 0xFFFFFF; - if (likely(offset < now)) - delta = now - offset; - else if (offset > now) - delta = (0xFFFFFF - offset) + now; + now = read_pmtmr(); + delta = (now - offset)&ACPI_PM_MASK; - /* The Power Management Timer ticks at 3.579545 ticks per microsecond. - * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] - * - * Even with HZ = 100, delta is at maximum 35796 ticks, so it can - * easily be multiplied with 286 (=0x11E) without having to fear - * u32 overflows. - */ - delta *= 286; - return (unsigned long) (delta >> 10); + return (unsigned long) offset_delay + cyc2us(delta); } /* acpi timer_opts struct */ struct timer_opts timer_pmtmr = { + .name = "pmtmr", .init = init_pmtmr, .mark_offset = mark_offset_pmtmr, .get_offset = get_offset_pmtmr, diff -puN Documentation/kernel-parameters.txt~acpi-timer-fixes-A5 Documentation/kernel-parameters.txt --- 25/Documentation/kernel-parameters.txt~acpi-timer-fixes-A5 Tue Jan 6 13:53:56 2004 +++ 25-akpm/Documentation/kernel-parameters.txt Tue Jan 6 13:53:56 2004 @@ -237,7 +237,7 @@ running once the system is up. Forces specified timesource (if avaliable) to be used when calculating gettimeofday(). If specicified timesource is not avalible, it defaults to PIT. - Format: { pit | tsc | cyclone | ... } + Format: { pit | tsc | cyclone | pmtmr } hpet= [IA-32,HPET] option to disable HPET and use PIT. Format: disable diff -puN drivers/acpi/Kconfig~acpi-timer-fixes-A5 drivers/acpi/Kconfig --- 25/drivers/acpi/Kconfig~acpi-timer-fixes-A5 Tue Jan 6 13:53:56 2004 +++ 25-akpm/drivers/acpi/Kconfig Tue Jan 6 13:53:56 2004 @@ -263,5 +263,23 @@ config ACPI_RELAXED_AML particular, many Toshiba laptops require this for correct operation of the AC module. +config X86_PM_TIMER + bool "Power Management Timer Support" + depends on X86 && ACPI + depends on ACPI_BOOT && EXPERIMENTAL + default n + help + The Power Management Timer is available on all ACPI-capable, + in most cases even if ACPI is unusable or blacklisted. + + This timing source is not affected by powermanagement features + like aggressive processor idling, throttling, frequency and/or + voltage scaling, unlike the commonly used Time Stamp Counter + (TSC) timing source. + + So, if you see messages like 'Losing too many ticks!' in the + kernel logs, and/or you are using a this on a notebook which + does not yet have an HPET, you should say "Y" here. + endmenu diff -puN include/asm-i386/timer.h~acpi-timer-fixes-A5 include/asm-i386/timer.h --- 25/include/asm-i386/timer.h~acpi-timer-fixes-A5 Tue Jan 6 13:53:56 2004 +++ 25-akpm/include/asm-i386/timer.h Tue Jan 6 13:53:56 2004 @@ -40,6 +40,7 @@ extern struct timer_opts timer_cyclone; #endif extern unsigned long calibrate_tsc(void); +extern void init_cpu_khz(void); #ifdef CONFIG_HPET_TIMER extern struct timer_opts timer_hpet; extern unsigned long calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr); _