From 5eb6d20533d14a432df714520939a6181e28f099 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Mon, 26 Jun 2006 00:25:07 -0700
Subject: [PATCH] Time: Use clocksource abstraction for NTP adjustments

Instead of incrementing xtime by tick_nsec + ntp adjustments, use the
clocksource abstraction to increment and scale time.  Using the clocksource
abstraction allows other clocksources to be used consistently in the face of
late or lost ticks, while preserving the existing behavior via the jiffies
clocksource.

This removes the need to keep time_phase adjustments as we just use the
current_tick_length() function as the NTP interface and accumulate time using
shifted nanoseconds.

The basics of this design was by Roman Zippel, however it is my own
interpretation and implementation, so the credit should go to him and the
blame to me.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/clocksource.h | 97 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/timer.c              | 47 +++++++++++++---------
 2 files changed, 125 insertions(+), 19 deletions(-)

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c4187cda0ee4d..c4739c4e30399 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -173,6 +173,103 @@ static inline void calculate_clocksource_interval(struct clocksource *c,
 	c->interval_snsecs = (u64)c->interval_cycles * c->mult;
 }
 
+
+/**
+ * error_aproximation - calculates an error adjustment for a given error
+ *
+ * @error:	Error value (unsigned)
+ * @unit:	Adjustment unit
+ *
+ * For a given error value, this function takes the adjustment unit
+ * and uses binary approximation to return a power of two adjustment value.
+ *
+ * This function is only for use by the the make_ntp_adj() function
+ * and you must hold a write on the xtime_lock when calling.
+ */
+static inline int error_aproximation(u64 error, u64 unit)
+{
+	static int saved_adj = 0;
+	u64 adjusted_unit = unit << saved_adj;
+
+	if (error > (adjusted_unit * 2)) {
+		/* large error, so increment the adjustment factor */
+		saved_adj++;
+	} else if (error > adjusted_unit) {
+		/* just right, don't touch it */
+	} else if (saved_adj) {
+		/* small error, so drop the adjustment factor */
+		saved_adj--;
+		return 0;
+	}
+
+	return saved_adj;
+}
+
+
+/**
+ * make_ntp_adj - Adjusts the specified clocksource for a given error
+ *
+ * @clock:		Pointer to clock to be adjusted
+ * @cycles_delta:	Current unacounted cycle delta
+ * @error:		Pointer to current error value
+ *
+ * Returns clock shifted nanosecond adjustment to be applied against
+ * the accumulated time value (ie: xtime).
+ *
+ * If the error value is large enough, this function calulates the
+ * (power of two) adjustment value, and adjusts the clock's mult and
+ * interval_snsecs values accordingly.
+ *
+ * However, since there may be some unaccumulated cycles, to avoid
+ * time inconsistencies we must adjust the accumulation value
+ * accordingly.
+ *
+ * This is not very intuitive, so the following proof should help:
+ * The basic timeofday algorithm:  base + cycle * mult
+ * Thus:
+ *    new_base + cycle * new_mult = old_base + cycle * old_mult
+ *    new_base = old_base + cycle * old_mult - cycle * new_mult
+ *    new_base = old_base + cycle * (old_mult - new_mult)
+ *    new_base - old_base = cycle * (old_mult - new_mult)
+ *    base_delta = cycle * (old_mult - new_mult)
+ *    base_delta = cycle * (mult_delta)
+ *
+ * Where mult_delta is the adjustment value made to mult
+ *
+ */
+static inline s64 make_ntp_adj(struct clocksource *clock,
+				cycles_t cycles_delta, s64* error)
+{
+	s64 ret = 0;
+	if (*error  > ((s64)clock->interval_cycles+1)/2) {
+		/* calculate adjustment value */
+		int adjustment = error_aproximation(*error,
+						clock->interval_cycles);
+		/* adjust clock */
+		clock->mult += 1 << adjustment;
+		clock->interval_snsecs += clock->interval_cycles << adjustment;
+
+		/* adjust the base and error for the adjustment */
+		ret =  -(cycles_delta << adjustment);
+		*error -= clock->interval_cycles << adjustment;
+		/* XXX adj error for cycle_delta offset? */
+	} else if ((-(*error))  > ((s64)clock->interval_cycles+1)/2) {
+		/* calculate adjustment value */
+		int adjustment = error_aproximation(-(*error),
+						clock->interval_cycles);
+		/* adjust clock */
+		clock->mult -= 1 << adjustment;
+		clock->interval_snsecs -= clock->interval_cycles << adjustment;
+
+		/* adjust the base and error for the adjustment */
+		ret =  cycles_delta << adjustment;
+		*error += clock->interval_cycles << adjustment;
+		/* XXX adj error for cycle_delta offset? */
+	}
+	return ret;
+}
+
+
 /* used to install a new clocksource */
 int register_clocksource(struct clocksource*);
 void reselect_clocksource(void);
diff --git a/kernel/timer.c b/kernel/timer.c
index 623f9ea198d8d..6811436a031dd 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -597,7 +597,6 @@ long time_tolerance = MAXFREQ;		/* frequency tolerance (ppm)	*/
 long time_precision = 1;		/* clock precision (us)		*/
 long time_maxerror = NTP_PHASE_LIMIT;	/* maximum error (us)		*/
 long time_esterror = NTP_PHASE_LIMIT;	/* estimated error (us)		*/
-static long time_phase;			/* phase offset (scaled us)	*/
 long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC;
 					/* frequency offset (scaled ppm)*/
 static long time_adj;			/* tick adjust (scaled 1 / HZ)	*/
@@ -747,27 +746,14 @@ static long adjtime_adjustment(void)
 }
 
 /* in the NTP reference this is called "hardclock()" */
-static void update_wall_time_one_tick(void)
+static void update_ntp_one_tick(void)
 {
-	long time_adjust_step, delta_nsec;
+	long time_adjust_step;
 
 	time_adjust_step = adjtime_adjustment();
 	if (time_adjust_step)
 		/* Reduce by this step the amount of time left  */
 		time_adjust -= time_adjust_step;
-	delta_nsec = tick_nsec + time_adjust_step * 1000;
-	/*
-	 * Advance the phase, once it gets to one microsecond, then
-	 * advance the tick more.
-	 */
-	time_phase += time_adj;
-	if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) {
-		long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10));
-		time_phase -= ltemp << (SHIFT_SCALE - 10);
-		delta_nsec += ltemp;
-	}
-	xtime.tv_nsec += delta_nsec;
-	time_interpolator_update(delta_nsec);
 
 	/* Changes by adjtime() do not take effect till next tick. */
 	if (time_next_adjust != 0) {
@@ -872,8 +858,13 @@ device_initcall(timekeeping_init_device);
  */
 static void update_wall_time(void)
 {
+	static s64 remainder_snsecs, error;
+	s64 snsecs_per_sec;
 	cycle_t now, offset;
 
+	snsecs_per_sec = (s64)NSEC_PER_SEC << clock->shift;
+	remainder_snsecs += (s64)xtime.tv_nsec << clock->shift;
+
 	now = read_clocksource(clock);
 	offset = (now - last_clock_cycle)&clock->mask;
 
@@ -881,17 +872,35 @@ static void update_wall_time(void)
 	 * case of lost or late ticks, it will accumulate correctly.
 	 */
 	while (offset > clock->interval_cycles) {
+		/* get the ntp interval in clock shifted nanoseconds */
+		s64 ntp_snsecs	= current_tick_length(clock->shift);
+
 		/* accumulate one interval */
+		remainder_snsecs += clock->interval_snsecs;
 		last_clock_cycle += clock->interval_cycles;
 		offset -= clock->interval_cycles;
 
-		update_wall_time_one_tick();
-		if (xtime.tv_nsec >= 1000000000) {
-			xtime.tv_nsec -= 1000000000;
+		/* interpolator bits */
+		time_interpolator_update(clock->interval_snsecs
+						>> clock->shift);
+		/* increment the NTP state machine */
+		update_ntp_one_tick();
+
+		/* accumulate error between NTP and clock interval */
+		error += (ntp_snsecs - (s64)clock->interval_snsecs);
+
+		/* correct the clock when NTP error is too big */
+		remainder_snsecs += make_ntp_adj(clock, offset, &error);
+
+		if (remainder_snsecs >= snsecs_per_sec) {
+			remainder_snsecs -= snsecs_per_sec;
 			xtime.tv_sec++;
 			second_overflow();
 		}
 	}
+	/* store full nanoseconds into xtime */
+	xtime.tv_nsec = remainder_snsecs >> clock->shift;
+	remainder_snsecs -= (s64)xtime.tv_nsec << clock->shift;
 }
 
 /*
-- 
cgit 1.2.3-korg