For some filesystems (ext3, reiserfs at least), ->dirty_inode() is very expensive. The kernel is currently calling mark_inode_dirty() at up to 1000 times/sec/inode. But there is no need to do this if the filesystem cannot store high-resolution times on-disk. This patch restores the optimisation of only dirtying the filesystem inode when its on-disk representation has actually changed. The filesystem will set the MS_ONE_SECOND flag in sb->s_flags to indicate that it wishes to receive this treatment. With this patch and the next one (which implements this for ext2 & ext3): Time to write a 1MB file with 1,000,000 1-byte-write()s: Before: ext2: 0.40s user 2.26s system 2.687 total After: ext2: 0.40s user 2.67s system 3.016 total (huh?) ext3: 0.57s user 18.12s system 18.632 total Time to overwrite a 1MB file with 1,000,000 1-byte-write()s: ext2: 0.41s user 2.39s system 2.783 total ext3: 0.49s user 9.14s system 9.602 total Time to read a 1MB file with 1,000,000 1-byte-read()s: ext2: 0.45s user 1.56s system 2.007 total ext3: 0.43s user 1.65s system 2.088 total fs/inode.c | 48 +++++++++++++++++++++++++++++++++++------------- include/linux/fs.h | 2 ++ 2 files changed, 37 insertions(+), 13 deletions(-) diff -puN fs/inode.c~update_atime-ng fs/inode.c --- 25/fs/inode.c~update_atime-ng 2003-03-07 02:57:21.000000000 -0800 +++ 25-akpm/fs/inode.c 2003-03-07 02:57:21.000000000 -0800 @@ -1077,6 +1077,19 @@ sector_t bmap(struct inode * inode, sect return res; } +/* + * Return true if the filesystem which backs this inode considers the two + * passed timespecs to be sufficiently different to warrant flushing the + * altered time out to disk. + */ +static int inode_times_differ(struct inode *inode, + struct timespec *old, struct timespec *new) +{ + if (IS_ONE_SECOND(inode)) + return old->tv_sec != new->tv_sec; + return !timespec_equal(old, new); +} + /** * update_atime - update the access time * @inode: inode accessed @@ -1088,19 +1101,23 @@ sector_t bmap(struct inode * inode, sect void update_atime(struct inode *inode) { - struct timespec now = CURRENT_TIME; + struct timespec now; - /* Can later do this more lazily with a per superblock interval */ - if (timespec_equal(&inode->i_atime, &now)) - return; if (IS_NOATIME(inode)) return; if (IS_NODIRATIME(inode) && S_ISDIR(inode->i_mode)) return; if (IS_RDONLY(inode)) return; - inode->i_atime = now; - mark_inode_dirty_sync(inode); + + now = current_kernel_time(); + if (inode_times_differ(inode, &inode->i_atime, &now)) { + inode->i_atime = now; + mark_inode_dirty_sync(inode); + } else { + if (!timespec_equal(&inode->i_atime, &now)) + inode->i_atime = now; + } } /** @@ -1109,20 +1126,25 @@ void update_atime(struct inode *inode) * @ctime_too: update ctime too * * Update the mtime time on an inode and mark it for writeback. - * This function automatically handles read only file systems and media. * When ctime_too is specified update the ctime too. */ void inode_update_time(struct inode *inode, int ctime_too) { - struct timespec now = CURRENT_TIME; - if (timespec_equal(&inode->i_mtime, &now) && - !(ctime_too && !timespec_equal(&inode->i_ctime, &now))) - return; + struct timespec now = current_kernel_time(); + int sync_it = 0; + + if (inode_times_differ(inode, &inode->i_mtime, &now)) + sync_it = 1; inode->i_mtime = now; - if (ctime_too) + + if (ctime_too) { + if (inode_times_differ(inode, &inode->i_ctime, &now)) + sync_it = 1; inode->i_ctime = now; - mark_inode_dirty_sync(inode); + } + if (sync_it) + mark_inode_dirty_sync(inode); } EXPORT_SYMBOL(inode_update_time); diff -puN include/linux/fs.h~update_atime-ng include/linux/fs.h --- 25/include/linux/fs.h~update_atime-ng 2003-03-07 02:57:21.000000000 -0800 +++ 25-akpm/include/linux/fs.h 2003-03-07 02:57:21.000000000 -0800 @@ -110,6 +110,7 @@ extern int leases_enable, dir_notify_ena #define MS_REC 16384 #define MS_VERBOSE 32768 #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define MS_ONE_SECOND (1<<17) /* fs has 1 sec a/m/ctime resolution */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -165,6 +166,7 @@ extern int leases_enable, dir_notify_ena #define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME)) #define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME) #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) +#define IS_ONE_SECOND(inode) __IS_FLG(inode, MS_ONE_SECOND) #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) _