aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2004-08-22 23:00:13 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-08-22 23:00:13 -0700
commitf3996c8cf3ff3595fa3c60efef8c07901b380b54 (patch)
tree2a7bf6765883d10496cec1214200ad5e69502244 /include
parentdb205bd6b845f6fdd4a1e1984371e25af9c62a13 (diff)
downloadhistory-f3996c8cf3ff3595fa3c60efef8c07901b380b54.tar.gz
[PATCH] Concurrent O_SYNC write support
In databases it is common to have multiple threads or processes performing O_SYNC writes against different parts of the same file. Our performance at this is poor, because each writer blocks access to the file by waiting on I/O completion while holding i_sem: everything is serialised. The patch improves things by moving the writing and waiting outside i_sem. So other threads can get in and submit their I/O and permit the disk scheduler to optimise the IO patterns better. Also, the O_SYNC writer only writes and waits on the pages which he wrote, rather than writing and waiting on all dirty pages in the file. The reason we haven't been able to do this before is that the required walk of the address_space page lists is easily livelockable without the i_sem serialisation. But in this patch we perform the waiting via a radix-tree walk of the affected pages. This cannot be livelocked. The sync of the inode's metadata is still performed inside i_sem. This is because it is list-based and is hence still livelockable. However it is usually the case that databases are overwriting existing file blocks and there will be no dirty buffers attached to the address_space anyway. The code is careful to ensure that the IO for the pages and the IO for the metadata are nonblockingly scheduled at the same time. This is am improvemtn over the current code, which will issue two separate write-and-wait cycles: one for metadata, one for pages. Note from Suparna: Reworked to use the tagged radix-tree based writeback infrastructure. Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/buffer_head.h6
-rw-r--r--include/linux/fs.h5
-rw-r--r--include/linux/writeback.h2
3 files changed, 7 insertions, 6 deletions
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 59ed4d09a9db61..f22efded164a5e 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -206,12 +206,6 @@ int nobh_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
int nobh_commit_write(struct file *, struct page *, unsigned, unsigned);
int nobh_truncate_page(struct address_space *, loff_t);
-#define OSYNC_METADATA (1<<0)
-#define OSYNC_DATA (1<<1)
-#define OSYNC_INODE (1<<2)
-int generic_osync_inode(struct inode *, struct address_space *, int);
-
-
/*
* inline definitions
*/
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b44be6f27c81af..1945d75d77f73e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -827,6 +827,11 @@ extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct de
#define DT_SOCK 12
#define DT_WHT 14
+#define OSYNC_METADATA (1<<0)
+#define OSYNC_DATA (1<<1)
+#define OSYNC_INODE (1<<2)
+int generic_osync_inode(struct inode *, struct address_space *, int);
+
/*
* This is the "filldir" function type, used by readdir() to let
* the kernel specify what kind of dirent layout it wants to have.
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 48d95e59230b7e..42157f942fa31c 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -103,6 +103,8 @@ void page_writeback_init(void);
void balance_dirty_pages_ratelimited(struct address_space *mapping);
int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
+int sync_page_range(struct inode *inode, struct address_space *mapping,
+ loff_t pos, size_t count);
/* pdflush.c */
extern int nr_pdflush_threads; /* Global so it can be exported to sysctl