diff options
author | Andrew Morton <akpm@osdl.org> | 2003-07-06 05:41:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@home.osdl.org> | 2003-07-06 05:41:12 -0700 |
commit | 6abc05cce8b06b9c986fb2bbd83e6fa9888ab1be (patch) | |
tree | da41f4772ecbdecc4e1ca9396478398c3ce159cf /fs/ext2/xattr.c | |
parent | 430cab6d8dc4b79ad691c1d50d7ef217777e2593 (diff) | |
download | history-6abc05cce8b06b9c986fb2bbd83e6fa9888ab1be.tar.gz |
[PATCH] xattr: fine-grained locking
From: Andreas Gruenbacher <agruen@suse.de>
This patch removes the dependency on i_sem in the getxattr and
listxattr iops of ext2 and ext3. In addition, the global ext[23]_xattr
semaphores go away. Instead of i_sem and the global semaphore, mutual
exclusion is now ensured by per-inode xattr semaphores, and by locking
the buffers before modifying them. The detailed locking strategy is
described in comments in fs/ext[23]/xattr.c.
Due to this change it is no longer necessary to take i_sem in
ext[23]_permission() for retrieving acls, so the
ext[23]_permission_locked() functions go away.
Additionally, the patch fixes a race condition in ext[23]_permission:
Accessing inode->i_acl was protected by the BKL in 2.4; in 2.5 there no
longer is such protection. Instead, inode->i_acl (and inode->i_default_acl)
are now accessed under inode->i_lock. (This could be replaced by RCU in
the future.)
In the ext3 extended attribute code, an new uglines results from locking
at the buffer head level: The buffer lock must be held between testing
if an xattr block can be modified and the actual modification to prevent
races from happening. Before a block can be modified,
ext3_journal_get_write_access() must be called. But this requies an unlocked
buffer, so I call ext3_journal_get_write_access() before locking the
buffer. If it turns out that the buffer cannot be modified,
journal_release_buffer() is called. Calling ext3_journal_get_write_access
after the test but while the buffer is still locked would be much better.
Diffstat (limited to 'fs/ext2/xattr.c')
-rw-r--r-- | fs/ext2/xattr.c | 155 |
1 files changed, 81 insertions, 74 deletions
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index aa29871da68e34..f1334adc62ede5 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -42,13 +42,12 @@ * * Locking strategy * ---------------- - * The VFS already holds the BKL and the inode->i_sem semaphore when any of - * the xattr inode operations are called, so we are guaranteed that only one - * processes accesses extended attributes of an inode at any time. - * - * For writing we also grab the ext2_xattr_sem semaphore. This ensures that - * only a single process is modifying an extended attribute block, even - * if the block is shared among inodes. + * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem. + * EA blocks are only changed if they are exclusive to an inode, so + * holding xattr_sem also means that nothing but the EA block's reference + * count will change. Multiple writers to an EA block are synchronized + * by the bh lock. No more than a single bh lock is held at any time + * to avoid deadlocks. */ #include <linux/buffer_head.h> @@ -57,7 +56,7 @@ #include <linux/slab.h> #include <linux/mbcache.h> #include <linux/quotaops.h> -#include <asm/semaphore.h> +#include <linux/rwsem.h> #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -105,15 +104,6 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *, struct ext2_xattr_entry *); static struct mb_cache *ext2_xattr_cache; - -/* - * If a file system does not share extended attributes among inodes, - * we should not need the ext2_xattr_sem semaphore. However, the - * filesystem may still contain shared blocks, so we always take - * the lock. - */ - -static DECLARE_MUTEX(ext2_xattr_sem); static struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX]; static rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED; @@ -196,7 +186,7 @@ ext2_xattr_handler(int name_index) /* * Inode operation getxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: don't care */ ssize_t ext2_getxattr(struct dentry *dentry, const char *name, @@ -204,39 +194,28 @@ ext2_getxattr(struct dentry *dentry, const char *name, { struct ext2_xattr_handler *handler; struct inode *inode = dentry->d_inode; - ssize_t error; handler = ext2_xattr_resolve_name(&name); if (!handler) return -EOPNOTSUPP; - down(&inode->i_sem); - error = handler->get(inode, name, buffer, size); - up(&inode->i_sem); - - return error; + return handler->get(inode, name, buffer, size); } /* * Inode operation listxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: don't care */ ssize_t ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) { - ssize_t error; - - down(&dentry->d_inode->i_sem); - error = ext2_xattr_list(dentry->d_inode, buffer, size); - up(&dentry->d_inode->i_sem); - - return error; + return ext2_xattr_list(dentry->d_inode, buffer, size); } /* * Inode operation setxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: down */ int ext2_setxattr(struct dentry *dentry, const char *name, @@ -256,7 +235,7 @@ ext2_setxattr(struct dentry *dentry, const char *name, /* * Inode operation removexattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: down */ int ext2_removexattr(struct dentry *dentry, const char *name) @@ -295,12 +274,15 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, if (name == NULL) return -EINVAL; + down_read(&EXT2_I(inode)->xattr_sem); + error = -ENODATA; if (!EXT2_I(inode)->i_file_acl) - return -ENODATA; + goto cleanup; ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); + error = -EIO; if (!bh) - return -EIO; + goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; @@ -365,6 +347,7 @@ found: cleanup: brelse(bh); + up_read(&EXT2_I(inode)->xattr_sem); return error; } @@ -391,12 +374,15 @@ ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) ea_idebug(inode, "buffer=%p, buffer_size=%ld", buffer, (long)buffer_size); + down_read(&EXT2_I(inode)->xattr_sem); + error = 0; if (!EXT2_I(inode)->i_file_acl) - return 0; + goto cleanup; ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); + error = -EIO; if (!bh) - return -EIO; + goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; @@ -449,6 +435,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", cleanup: brelse(bh); + up_read(&EXT2_I(inode)->xattr_sem); return error; } @@ -520,8 +507,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, name_len = strlen(name); if (name_len > 255 || value_len > sb->s_blocksize) return -ERANGE; - down(&ext2_xattr_sem); - + down_write(&EXT2_I(inode)->xattr_sem); if (EXT2_I(inode)->i_file_acl) { /* The inode already has an extended attribute block. */ bh = sb_bread(sb, EXT2_I(inode)->i_file_acl); @@ -614,12 +600,16 @@ bad_block: ext2_error(sb, "ext2_xattr_set", /* Here we know that we can set the new attribute. */ if (header) { + /* assert(header == HDR(bh)); */ + lock_buffer(bh); if (header->h_refcount == cpu_to_le32(1)) { ea_bdebug(bh, "modifying in-place"); ext2_xattr_cache_remove(bh); + /* keep the buffer locked while modifying it. */ } else { int offset; + unlock_buffer(bh); ea_bdebug(bh, "cloning"); header = kmalloc(bh->b_size, GFP_KERNEL); error = -ENOMEM; @@ -644,6 +634,8 @@ bad_block: ext2_error(sb, "ext2_xattr_set", last = here = ENTRY(header+1); } + /* Iff we are modifying the block in-place, bh is locked here. */ + if (not_found) { /* Insert the new name. */ size_t size = EXT2_XATTR_LEN(name_len); @@ -714,9 +706,13 @@ bad_block: ext2_error(sb, "ext2_xattr_set", skip_replace: if (IS_LAST_ENTRY(ENTRY(header+1))) { /* This block is now empty. */ + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ error = ext2_xattr_set2(inode, bh, NULL); } else { ext2_xattr_rehash(header, here); + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ error = ext2_xattr_set2(inode, bh, header); } @@ -724,7 +720,7 @@ cleanup: brelse(bh); if (!(bh && header == HDR(bh))) kfree(header); - up(&ext2_xattr_sem); + up_write(&EXT2_I(inode)->xattr_sem); return error; } @@ -744,24 +740,28 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, new_bh = ext2_xattr_cache_find(inode, header); if (new_bh) { /* - * We found an identical block in the cache. - * The old block will be released after updating - * the inode. + * We found an identical block in the cache. The + * block returned is locked. The old block will + * be released after updating the inode. */ ea_bdebug(new_bh, "%s block %lu", (old_bh == new_bh) ? "keeping" : "reusing", (unsigned long) new_bh->b_blocknr); error = -EDQUOT; - if (DQUOT_ALLOC_BLOCK(inode, 1)) + if (DQUOT_ALLOC_BLOCK(inode, 1)) { + unlock_buffer(new_bh); goto cleanup; + } HDR(new_bh)->h_refcount = cpu_to_le32( le32_to_cpu(HDR(new_bh)->h_refcount) + 1); ea_bdebug(new_bh, "refcount now=%d", le32_to_cpu(HDR(new_bh)->h_refcount)); + unlock_buffer(new_bh); } else if (old_bh && header == HDR(old_bh)) { - /* Keep this block. */ + /* Keep this block. No need to lock the block as we + don't need to change the reference count. */ new_bh = old_bh; get_bh(new_bh); ext2_xattr_cache_insert(new_bh); @@ -812,12 +812,11 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, error = 0; if (old_bh && old_bh != new_bh) { /* - * If there was an old block, and we are not still using it, - * we now release the old block. - */ - unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); - - if (refcount == 1) { + * If there was an old block and we are no longer using it, + * release the old block. + */ + lock_buffer(old_bh); + if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { /* Free the old block. */ ea_bdebug(old_bh, "freeing"); ext2_free_blocks(inode, old_bh->b_blocknr, 1); @@ -827,12 +826,14 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, bforget(old_bh); } else { /* Decrement the refcount only. */ - refcount--; - HDR(old_bh)->h_refcount = cpu_to_le32(refcount); + HDR(old_bh)->h_refcount = cpu_to_le32( + le32_to_cpu(HDR(old_bh)->h_refcount) - 1); DQUOT_FREE_BLOCK(inode, 1); mark_buffer_dirty(old_bh); - ea_bdebug(old_bh, "refcount now=%d", refcount); + ea_bdebug(old_bh, "refcount now=%d", + le32_to_cpu(HDR(old_bh)->h_refcount)); } + unlock_buffer(old_bh); } cleanup: @@ -850,12 +851,11 @@ cleanup: void ext2_xattr_delete_inode(struct inode *inode) { - struct buffer_head *bh; + struct buffer_head *bh = NULL; + down_write(&EXT2_I(inode)->xattr_sem); if (!EXT2_I(inode)->i_file_acl) - return; - down(&ext2_xattr_sem); - + goto cleanup; bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); if (!bh) { ext2_error(inode->i_sb, "ext2_xattr_delete_inode", @@ -871,7 +871,7 @@ ext2_xattr_delete_inode(struct inode *inode) EXT2_I(inode)->i_file_acl); goto cleanup; } - ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); + lock_buffer(bh); if (HDR(bh)->h_refcount == cpu_to_le32(1)) { ext2_xattr_cache_remove(bh); ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); @@ -885,11 +885,13 @@ ext2_xattr_delete_inode(struct inode *inode) sync_dirty_buffer(bh); DQUOT_FREE_BLOCK(inode, 1); } + ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); + unlock_buffer(bh); EXT2_I(inode)->i_file_acl = 0; cleanup: brelse(bh); - up(&ext2_xattr_sem); + up_write(&EXT2_I(inode)->xattr_sem); } /* @@ -982,8 +984,8 @@ ext2_xattr_cmp(struct ext2_xattr_header *header1, * * Find an identical extended attribute block. * - * Returns a pointer to the block found, or NULL if such a block was - * not found or an error occurred. + * Returns a locked buffer head to the block found, or NULL if such + * a block was not found or an error occurred. */ static struct buffer_head * ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) @@ -1003,18 +1005,23 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) ext2_error(inode->i_sb, "ext2_xattr_cache_find", "inode %ld: block %ld read error", inode->i_ino, (unsigned long) ce->e_block); - } else if (le32_to_cpu(HDR(bh)->h_refcount) > - EXT2_XATTR_REFCOUNT_MAX) { - ea_idebug(inode, "block %ld refcount %d>%d", - (unsigned long) ce->e_block, - le32_to_cpu(HDR(bh)->h_refcount), - EXT2_XATTR_REFCOUNT_MAX); - } else if (!ext2_xattr_cmp(header, HDR(bh))) { - ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); - mb_cache_entry_release(ce); - return bh; + } else { + lock_buffer(bh); + if (le32_to_cpu(HDR(bh)->h_refcount) > + EXT2_XATTR_REFCOUNT_MAX) { + ea_idebug(inode, "block %ld refcount %d>%d", + (unsigned long) ce->e_block, + le32_to_cpu(HDR(bh)->h_refcount), + EXT2_XATTR_REFCOUNT_MAX); + } else if (!ext2_xattr_cmp(header, HDR(bh))) { + ea_bdebug(bh, "b_count=%d", + atomic_read(&(bh->b_count))); + mb_cache_entry_release(ce); + return bh; + } + unlock_buffer(bh); + brelse(bh); } - brelse(bh); ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); } return NULL; |