MS_ASYNC will currently wait on previously-submitted I/O, then start new I/O and not wait on it. This can cause undesirable blocking if msync is called rapidly against the same memory. So instead, change msync(MS_ASYNC) to not start any IO at all. Just flush the pte dirty bits into the pageframe and leave it at that. The IO _will_ happen within a kupdate period. And the application can use fsync() or fadvise(FADV_DONTNEED) if it actually wants to schedule the IO immediately. 25-akpm/mm/msync.c | 30 +++++++++++++++--------------- 1 files changed, 15 insertions(+), 15 deletions(-) diff -puN mm/msync.c~MS_ASYNC-more-async mm/msync.c --- 25/mm/msync.c~MS_ASYNC-more-async Mon Apr 7 13:13:37 2003 +++ 25-akpm/mm/msync.c Mon Apr 7 13:17:13 2003 @@ -125,11 +125,13 @@ static int filemap_sync(struct vm_area_s /* * MS_SYNC syncs the entire file - including mappings. * - * MS_ASYNC initiates writeout of just the dirty mapped data. - * This provides no guarantee of file integrity - things like indirect - * blocks may not have started writeout. MS_ASYNC is primarily useful - * where the application knows that it has finished with the data and - * wishes to intelligently schedule its own I/O traffic. + * MS_ASYNC does not start I/O (it used to, up to 2.5.67). Instead, it just + * marks the relevant pages dirty. The application may now run fsync() to + * write out the dirty pages and wait on the writeout and check the result. + * Or the application may run fadvise(FADV_DONTNEED) against the fd to start + * async writeout immediately. + * So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to + * applications. */ static int msync_interval(struct vm_area_struct * vma, unsigned long start, unsigned long end, int flags) @@ -143,22 +145,20 @@ static int msync_interval(struct vm_area if (file && (vma->vm_flags & VM_SHARED)) { ret = filemap_sync(vma, start, end-start, flags); - if (!ret && (flags & (MS_SYNC|MS_ASYNC))) { - struct inode * inode = file->f_dentry->d_inode; + if (!ret && (flags & MS_SYNC)) { + struct inode *inode = file->f_dentry->d_inode; int err; down(&inode->i_sem); ret = filemap_fdatawrite(inode->i_mapping); - if (flags & MS_SYNC) { - if (file->f_op && file->f_op->fsync) { - err = file->f_op->fsync(file, file->f_dentry, 1); - if (err && !ret) - ret = err; - } - err = filemap_fdatawait(inode->i_mapping); - if (!ret) + if (file->f_op && file->f_op->fsync) { + err = file->f_op->fsync(file,file->f_dentry,1); + if (err && !ret) ret = err; } + err = filemap_fdatawait(inode->i_mapping); + if (!ret) + ret = err; up(&inode->i_sem); } } _