aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2005-01-04 05:30:08 -0800
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-01-04 05:30:08 -0800
commit8ce13b01c190a7e663ea146ba89d1153240884bb (patch)
tree5979ec0f842bf2765c77f90f9628a62b7eb32ca4 /kernel
parent8fa29920db547f6d0e7729533be5ccf0a005b89b (diff)
downloadhistory-8ce13b01c190a7e663ea146ba89d1153240884bb.tar.gz
[PATCH] Sync in core time granuality with filesystems
This patch corrects a problem that was originally added with the nanosecond timestamps in stat patch. The problem is that some file systems don't have enough space in their on disk inode to save nanosecond timestamps, so they truncate the c/a/mtime to seconds when flushing an dirty node. In core the inode would have full jiffies granuality. This can be observed by programs as a timestamp that jumps backwards under specific loads when an inode is flushed and then reloaded from disk. The problem was already known when the original patch went in, but it wasn't deemed important enough at that time. So far there has been only one report of it causing problems. Now Tridge is worried that it will break running Excel over samba4 because Excel seems to do very anal timestamp checking and samba4 will supply 100ns timestamps over the network. This patch solves it by putting the time resolution into the superblock of a fs and always rounding the in core timestamps to that granuality. This also supercedes some previous ext2/3 hacks to flush the inode less often when only the subsecond timestamp changes. I tried to keep the overhead low, in particular it tries to keep divisions out of fast paths as far as possible. The patch is quite big but 99% of it is just relatively straight forward search'n'replace in a lot of fs. Unconverted filesystems will default to a 1ns granuality, but may still show the problem if they continue to use CURRENT_TIME. I converted all in tree fs. One possible future extension of this would be to have two time granualities per superblock - one that specifies the visible resolution, and the other to specify how often timestamps should be flushed to disk, which could be tuned with a mount option per fs (e.g. often m/atimes don't need to be flushed every second). Would be easy to do as an addon if someone is interested. Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/time.c47
1 files changed, 46 insertions, 1 deletions
diff --git a/kernel/time.c b/kernel/time.c
index 5ceab525f203f0..d5400f6af052db 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -33,6 +33,7 @@
#include <linux/smp_lock.h>
#include <linux/syscalls.h>
#include <linux/security.h>
+#include <linux/fs.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -415,7 +416,7 @@ asmlinkage long sys_adjtimex(struct timex __user *txc_p)
return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
}
-struct timespec current_kernel_time(void)
+inline struct timespec current_kernel_time(void)
{
struct timespec now;
unsigned long seq;
@@ -431,6 +432,50 @@ struct timespec current_kernel_time(void)
EXPORT_SYMBOL(current_kernel_time);
+/**
+ * current_fs_time - Return FS time
+ * @sb: Superblock.
+ *
+ * Return the current time truncated to the time granuality supported by
+ * the fs.
+ */
+struct timespec current_fs_time(struct super_block *sb)
+{
+ struct timespec now = current_kernel_time();
+ return timespec_trunc(now, sb->s_time_gran);
+}
+EXPORT_SYMBOL(current_fs_time);
+
+/**
+ * timespec_trunc - Truncate timespec to a granuality
+ * @t: Timespec
+ * @gran: Granuality in ns.
+ *
+ * Truncate a timespec to a granuality. gran must be smaller than a second.
+ * Always rounds down.
+ *
+ * This function should be only used for timestamps returned by
+ * current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because
+ * it doesn't handle the better resolution of the later.
+ */
+struct timespec timespec_trunc(struct timespec t, unsigned gran)
+{
+ /*
+ * Division is pretty slow so avoid it for common cases.
+ * Currently current_kernel_time() never returns better than
+ * jiffies resolution. Exploit that.
+ */
+ if (gran <= jiffies_to_usecs(1) * 1000) {
+ /* nothing */
+ } else if (gran == 1000000000) {
+ t.tv_nsec = 0;
+ } else {
+ t.tv_nsec -= t.tv_nsec % gran;
+ }
+ return t;
+}
+EXPORT_SYMBOL(timespec_trunc);
+
#ifdef CONFIG_TIME_INTERPOLATION
void getnstimeofday (struct timespec *tv)
{