diff options
author | Andrew Morton <akpm@osdl.org> | 2003-07-06 05:41:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@home.osdl.org> | 2003-07-06 05:41:12 -0700 |
commit | 6abc05cce8b06b9c986fb2bbd83e6fa9888ab1be (patch) | |
tree | da41f4772ecbdecc4e1ca9396478398c3ce159cf /fs/ext3/xattr.c | |
parent | 430cab6d8dc4b79ad691c1d50d7ef217777e2593 (diff) | |
download | history-6abc05cce8b06b9c986fb2bbd83e6fa9888ab1be.tar.gz |
[PATCH] xattr: fine-grained locking
From: Andreas Gruenbacher <agruen@suse.de>
This patch removes the dependency on i_sem in the getxattr and
listxattr iops of ext2 and ext3. In addition, the global ext[23]_xattr
semaphores go away. Instead of i_sem and the global semaphore, mutual
exclusion is now ensured by per-inode xattr semaphores, and by locking
the buffers before modifying them. The detailed locking strategy is
described in comments in fs/ext[23]/xattr.c.
Due to this change it is no longer necessary to take i_sem in
ext[23]_permission() for retrieving acls, so the
ext[23]_permission_locked() functions go away.
Additionally, the patch fixes a race condition in ext[23]_permission:
Accessing inode->i_acl was protected by the BKL in 2.4; in 2.5 there no
longer is such protection. Instead, inode->i_acl (and inode->i_default_acl)
are now accessed under inode->i_lock. (This could be replaced by RCU in
the future.)
In the ext3 extended attribute code, an new uglines results from locking
at the buffer head level: The buffer lock must be held between testing
if an xattr block can be modified and the actual modification to prevent
races from happening. Before a block can be modified,
ext3_journal_get_write_access() must be called. But this requies an unlocked
buffer, so I call ext3_journal_get_write_access() before locking the
buffer. If it turns out that the buffer cannot be modified,
journal_release_buffer() is called. Calling ext3_journal_get_write_access
after the test but while the buffer is still locked would be much better.
Diffstat (limited to 'fs/ext3/xattr.c')
-rw-r--r-- | fs/ext3/xattr.c | 201 |
1 files changed, 111 insertions, 90 deletions
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index b89f8be46f0f7d..6fbda077bdbe9b 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -43,13 +43,12 @@ * * Locking strategy * ---------------- - * The VFS holdsinode->i_sem semaphore when any of the xattr inode - * operations are called, so we are guaranteed that only one - * processes accesses extended attributes of an inode at any time. - * - * For writing we also grab the ext3_xattr_sem semaphore. This ensures that - * only a single process is modifying an extended attribute block, even - * if the block is shared among inodes. + * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem. + * EA blocks are only changed if they are exclusive to an inode, so + * holding xattr_sem also means that nothing but the EA block's reference + * count will change. Multiple writers to an EA block are synchronized + * by the bh lock. No more than a single bh lock is held at any time + * to avoid deadlocks. */ #include <linux/init.h> @@ -59,7 +58,7 @@ #include <linux/ext3_fs.h> #include <linux/mbcache.h> #include <linux/quotaops.h> -#include <asm/semaphore.h> +#include <linux/rwsem.h> #include "xattr.h" #include "acl.h" @@ -93,22 +92,14 @@ static int ext3_xattr_set_handle2(handle_t *, struct inode *, struct ext3_xattr_header *); static int ext3_xattr_cache_insert(struct buffer_head *); -static struct buffer_head *ext3_xattr_cache_find(struct inode *, - struct ext3_xattr_header *); +static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *, + struct ext3_xattr_header *, + int *); static void ext3_xattr_cache_remove(struct buffer_head *); static void ext3_xattr_rehash(struct ext3_xattr_header *, struct ext3_xattr_entry *); static struct mb_cache *ext3_xattr_cache; - -/* - * If a file system does not share extended attributes among inodes, - * we should not need the ext3_xattr_sem semaphore. However, the - * filesystem may still contain shared blocks, so we always take - * the lock. - */ - -static DECLARE_MUTEX(ext3_xattr_sem); static struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; static rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; @@ -191,7 +182,7 @@ ext3_xattr_handler(int name_index) /* * Inode operation getxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: don't care */ ssize_t ext3_getxattr(struct dentry *dentry, const char *name, @@ -199,39 +190,28 @@ ext3_getxattr(struct dentry *dentry, const char *name, { struct ext3_xattr_handler *handler; struct inode *inode = dentry->d_inode; - ssize_t error; handler = ext3_xattr_resolve_name(&name); if (!handler) return -EOPNOTSUPP; - down(&inode->i_sem); - error = handler->get(inode, name, buffer, size); - up(&inode->i_sem); - - return error; + return handler->get(inode, name, buffer, size); } /* * Inode operation listxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: don't care */ ssize_t ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) { - ssize_t error; - - down(&dentry->d_inode->i_sem); - error = ext3_xattr_list(dentry->d_inode, buffer, size); - up(&dentry->d_inode->i_sem); - - return error; + return ext3_xattr_list(dentry->d_inode, buffer, size); } /* * Inode operation setxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: down */ int ext3_setxattr(struct dentry *dentry, const char *name, @@ -251,7 +231,7 @@ ext3_setxattr(struct dentry *dentry, const char *name, /* * Inode operation removexattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: down */ int ext3_removexattr(struct dentry *dentry, const char *name) @@ -290,12 +270,15 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name, if (name == NULL) return -EINVAL; + down_read(&EXT3_I(inode)->xattr_sem); + error = -ENODATA; if (!EXT3_I(inode)->i_file_acl) - return -ENODATA; + goto cleanup; ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); + error = -EIO; if (!bh) - return -EIO; + goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; @@ -360,6 +343,7 @@ found: cleanup: brelse(bh); + up_read(&EXT3_I(inode)->xattr_sem); return error; } @@ -386,12 +370,15 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) ea_idebug(inode, "buffer=%p, buffer_size=%ld", buffer, (long)buffer_size); + down_read(&EXT3_I(inode)->xattr_sem); + error = 0; if (!EXT3_I(inode)->i_file_acl) - return 0; + goto cleanup; ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); + error = -EIO; if (!bh) - return -EIO; + goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; @@ -444,6 +431,7 @@ bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", cleanup: brelse(bh); + up_read(&EXT3_I(inode)->xattr_sem); return error; } @@ -459,11 +447,12 @@ static void ext3_xattr_update_super_block(handle_t *handle, return; lock_super(sb); - ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - EXT3_SB(sb)->s_es->s_feature_compat |= - cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); - sb->s_dirt = 1; - ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) { + EXT3_SB(sb)->s_es->s_feature_compat |= + cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); + sb->s_dirt = 1; + ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + } unlock_super(sb); } @@ -518,8 +507,7 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, name_len = strlen(name); if (name_len > 255 || value_len > sb->s_blocksize) return -ERANGE; - down(&ext3_xattr_sem); - + down_write(&EXT3_I(inode)->xattr_sem); if (EXT3_I(inode)->i_file_acl) { /* The inode already has an extended attribute block. */ bh = sb_bread(sb, EXT3_I(inode)->i_file_acl); @@ -612,15 +600,28 @@ bad_block: ext3_error(sb, "ext3_xattr_set", /* Here we know that we can set the new attribute. */ if (header) { + int credits = 0; + + /* assert(header == HDR(bh)); */ + if (header->h_refcount != cpu_to_le32(1)) + goto skip_get_write_access; + /* ext3_journal_get_write_access() requires an unlocked bh, + which complicates things here. */ + error = ext3_journal_get_write_access_credits(handle, bh, + &credits); + if (error) + goto cleanup; + lock_buffer(bh); if (header->h_refcount == cpu_to_le32(1)) { ea_bdebug(bh, "modifying in-place"); ext3_xattr_cache_remove(bh); - error = ext3_journal_get_write_access(handle, bh); - if (error) - goto cleanup; + /* keep the buffer locked while modifying it. */ } else { int offset; + unlock_buffer(bh); + journal_release_buffer(handle, bh, credits); + skip_get_write_access: ea_bdebug(bh, "cloning"); header = kmalloc(bh->b_size, GFP_KERNEL); error = -ENOMEM; @@ -645,6 +646,8 @@ bad_block: ext3_error(sb, "ext3_xattr_set", last = here = ENTRY(header+1); } + /* Iff we are modifying the block in-place, bh is locked here. */ + if (not_found) { /* Insert the new name. */ size_t size = EXT3_XATTR_LEN(name_len); @@ -715,9 +718,13 @@ bad_block: ext3_error(sb, "ext3_xattr_set", skip_replace: if (IS_LAST_ENTRY(ENTRY(header+1))) { /* This block is now empty. */ + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ error = ext3_xattr_set_handle2(handle, inode, bh, NULL); } else { ext3_xattr_rehash(header, here); + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ error = ext3_xattr_set_handle2(handle, inode, bh, header); } @@ -725,7 +732,7 @@ cleanup: brelse(bh); if (!(bh && header == HDR(bh))) kfree(header); - up(&ext3_xattr_sem); + up_write(&EXT3_I(inode)->xattr_sem); return error; } @@ -740,33 +747,34 @@ ext3_xattr_set_handle2(handle_t *handle, struct inode *inode, { struct super_block *sb = inode->i_sb; struct buffer_head *new_bh = NULL; - int error; + int credits = 0, error; if (header) { - new_bh = ext3_xattr_cache_find(inode, header); + new_bh = ext3_xattr_cache_find(handle, inode, header, &credits); if (new_bh) { /* - * We found an identical block in the cache. - * The old block will be released after updating - * the inode. + * We found an identical block in the cache. The + * block returned is locked. The old block will + * be released after updating the inode. */ ea_bdebug(new_bh, "%s block %lu", (old_bh == new_bh) ? "keeping" : "reusing", (unsigned long) new_bh->b_blocknr); error = -EDQUOT; - if (DQUOT_ALLOC_BLOCK(inode, 1)) - goto cleanup; - - error = ext3_journal_get_write_access(handle, new_bh); - if (error) + if (DQUOT_ALLOC_BLOCK(inode, 1)) { + unlock_buffer(new_bh); + journal_release_buffer(handle, new_bh, credits); goto cleanup; + } HDR(new_bh)->h_refcount = cpu_to_le32( le32_to_cpu(HDR(new_bh)->h_refcount) + 1); ea_bdebug(new_bh, "refcount now=%d", le32_to_cpu(HDR(new_bh)->h_refcount)); + unlock_buffer(new_bh); } else if (old_bh && header == HDR(old_bh)) { - /* Keep this block. */ + /* Keep this block. No need to lock the block as we + * don't need to change the reference count. */ new_bh = old_bh; get_bh(new_bh); ext3_xattr_cache_insert(new_bh); @@ -817,15 +825,14 @@ getblk_failed: error = 0; if (old_bh && old_bh != new_bh) { /* - * If there was an old block, and we are not still using it, - * we now release the old block. + * If there was an old block, and we are no longer using it, + * release the old block. */ - unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); - error = ext3_journal_get_write_access(handle, old_bh); if (error) goto cleanup; - if (refcount == 1) { + lock_buffer(old_bh); + if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { /* Free the old block. */ ea_bdebug(old_bh, "freeing"); ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); @@ -837,12 +844,14 @@ getblk_failed: ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); } else { /* Decrement the refcount only. */ - refcount--; - HDR(old_bh)->h_refcount = cpu_to_le32(refcount); + HDR(old_bh)->h_refcount = cpu_to_le32( + le32_to_cpu(HDR(old_bh)->h_refcount) - 1); DQUOT_FREE_BLOCK(inode, 1); ext3_journal_dirty_metadata(handle, old_bh); - ea_bdebug(old_bh, "refcount now=%d", refcount); + ea_bdebug(old_bh, "refcount now=%d", + le32_to_cpu(HDR(old_bh)->h_refcount)); } + unlock_buffer(old_bh); } cleanup: @@ -886,12 +895,11 @@ ext3_xattr_set(struct inode *inode, int name_index, const char *name, void ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) { - struct buffer_head *bh; + struct buffer_head *bh = NULL; + down_write(&EXT3_I(inode)->xattr_sem); if (!EXT3_I(inode)->i_file_acl) - return; - down(&ext3_xattr_sem); - + goto cleanup; bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); if (!bh) { ext3_error(inode->i_sb, "ext3_xattr_delete_inode", @@ -899,7 +907,6 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) EXT3_I(inode)->i_file_acl); goto cleanup; } - ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || HDR(bh)->h_blocks != cpu_to_le32(1)) { ext3_error(inode->i_sb, "ext3_xattr_delete_inode", @@ -907,8 +914,9 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) EXT3_I(inode)->i_file_acl); goto cleanup; } - ext3_journal_get_write_access(handle, bh); - ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); + if (ext3_journal_get_write_access(handle, bh) != 0) + goto cleanup; + lock_buffer(bh); if (HDR(bh)->h_refcount == cpu_to_le32(1)) { ext3_xattr_cache_remove(bh); ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); @@ -922,11 +930,13 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) handle->h_sync = 1; DQUOT_FREE_BLOCK(inode, 1); } + ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); + unlock_buffer(bh); EXT3_I(inode)->i_file_acl = 0; cleanup: brelse(bh); - up(&ext3_xattr_sem); + up_write(&EXT3_I(inode)->xattr_sem); } /* @@ -1022,7 +1032,8 @@ ext3_xattr_cmp(struct ext3_xattr_header *header1, * not found or an error occurred. */ static struct buffer_head * -ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) +ext3_xattr_cache_find(handle_t *handle, struct inode *inode, + struct ext3_xattr_header *header, int *credits) { __u32 hash = le32_to_cpu(header->h_hash); struct mb_cache_entry *ce; @@ -1039,18 +1050,28 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) ext3_error(inode->i_sb, "ext3_xattr_cache_find", "inode %ld: block %ld read error", inode->i_ino, (unsigned long) ce->e_block); - } else if (le32_to_cpu(HDR(bh)->h_refcount) > - EXT3_XATTR_REFCOUNT_MAX) { - ea_idebug(inode, "block %ld refcount %d>%d", - (unsigned long) ce->e_block, - le32_to_cpu(HDR(bh)->h_refcount), - EXT3_XATTR_REFCOUNT_MAX); - } else if (!ext3_xattr_cmp(header, HDR(bh))) { - ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); - mb_cache_entry_release(ce); - return bh; + } else { + /* ext3_journal_get_write_access() requires an unlocked + * bh, which complicates things here. */ + if (ext3_journal_get_write_access_credits(handle, bh, + credits) != 0) + return NULL; + lock_buffer(bh); + if (le32_to_cpu(HDR(bh)->h_refcount) > + EXT3_XATTR_REFCOUNT_MAX) { + ea_idebug(inode, "block %ld refcount %d>%d", + (unsigned long) ce->e_block, + le32_to_cpu(HDR(bh)->h_refcount), + EXT3_XATTR_REFCOUNT_MAX); + } else if (!ext3_xattr_cmp(header, HDR(bh))) { + mb_cache_entry_release(ce); + /* buffer will be unlocked by caller */ + return bh; + } + unlock_buffer(bh); + journal_release_buffer(handle, bh, *credits); + brelse(bh); } - brelse(bh); ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); } return NULL; |