aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext2/xattr.c
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2003-07-06 05:41:12 -0700
committerLinus Torvalds <torvalds@home.osdl.org>2003-07-06 05:41:12 -0700
commit6abc05cce8b06b9c986fb2bbd83e6fa9888ab1be (patch)
treeda41f4772ecbdecc4e1ca9396478398c3ce159cf /fs/ext2/xattr.c
parent430cab6d8dc4b79ad691c1d50d7ef217777e2593 (diff)
downloadhistory-6abc05cce8b06b9c986fb2bbd83e6fa9888ab1be.tar.gz
[PATCH] xattr: fine-grained locking
From: Andreas Gruenbacher <agruen@suse.de> This patch removes the dependency on i_sem in the getxattr and listxattr iops of ext2 and ext3. In addition, the global ext[23]_xattr semaphores go away. Instead of i_sem and the global semaphore, mutual exclusion is now ensured by per-inode xattr semaphores, and by locking the buffers before modifying them. The detailed locking strategy is described in comments in fs/ext[23]/xattr.c. Due to this change it is no longer necessary to take i_sem in ext[23]_permission() for retrieving acls, so the ext[23]_permission_locked() functions go away. Additionally, the patch fixes a race condition in ext[23]_permission: Accessing inode->i_acl was protected by the BKL in 2.4; in 2.5 there no longer is such protection. Instead, inode->i_acl (and inode->i_default_acl) are now accessed under inode->i_lock. (This could be replaced by RCU in the future.) In the ext3 extended attribute code, an new uglines results from locking at the buffer head level: The buffer lock must be held between testing if an xattr block can be modified and the actual modification to prevent races from happening. Before a block can be modified, ext3_journal_get_write_access() must be called. But this requies an unlocked buffer, so I call ext3_journal_get_write_access() before locking the buffer. If it turns out that the buffer cannot be modified, journal_release_buffer() is called. Calling ext3_journal_get_write_access after the test but while the buffer is still locked would be much better.
Diffstat (limited to 'fs/ext2/xattr.c')
-rw-r--r--fs/ext2/xattr.c155
1 files changed, 81 insertions, 74 deletions
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index aa29871da68e34..f1334adc62ede5 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -42,13 +42,12 @@
*
* Locking strategy
* ----------------
- * The VFS already holds the BKL and the inode->i_sem semaphore when any of
- * the xattr inode operations are called, so we are guaranteed that only one
- * processes accesses extended attributes of an inode at any time.
- *
- * For writing we also grab the ext2_xattr_sem semaphore. This ensures that
- * only a single process is modifying an extended attribute block, even
- * if the block is shared among inodes.
+ * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem.
+ * EA blocks are only changed if they are exclusive to an inode, so
+ * holding xattr_sem also means that nothing but the EA block's reference
+ * count will change. Multiple writers to an EA block are synchronized
+ * by the bh lock. No more than a single bh lock is held at any time
+ * to avoid deadlocks.
*/
#include <linux/buffer_head.h>
@@ -57,7 +56,7 @@
#include <linux/slab.h>
#include <linux/mbcache.h>
#include <linux/quotaops.h>
-#include <asm/semaphore.h>
+#include <linux/rwsem.h>
#include "ext2.h"
#include "xattr.h"
#include "acl.h"
@@ -105,15 +104,6 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *,
struct ext2_xattr_entry *);
static struct mb_cache *ext2_xattr_cache;
-
-/*
- * If a file system does not share extended attributes among inodes,
- * we should not need the ext2_xattr_sem semaphore. However, the
- * filesystem may still contain shared blocks, so we always take
- * the lock.
- */
-
-static DECLARE_MUTEX(ext2_xattr_sem);
static struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX];
static rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED;
@@ -196,7 +186,7 @@ ext2_xattr_handler(int name_index)
/*
* Inode operation getxattr()
*
- * dentry->d_inode->i_sem down
+ * dentry->d_inode->i_sem: don't care
*/
ssize_t
ext2_getxattr(struct dentry *dentry, const char *name,
@@ -204,39 +194,28 @@ ext2_getxattr(struct dentry *dentry, const char *name,
{
struct ext2_xattr_handler *handler;
struct inode *inode = dentry->d_inode;
- ssize_t error;
handler = ext2_xattr_resolve_name(&name);
if (!handler)
return -EOPNOTSUPP;
- down(&inode->i_sem);
- error = handler->get(inode, name, buffer, size);
- up(&inode->i_sem);
-
- return error;
+ return handler->get(inode, name, buffer, size);
}
/*
* Inode operation listxattr()
*
- * dentry->d_inode->i_sem down
+ * dentry->d_inode->i_sem: don't care
*/
ssize_t
ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
- ssize_t error;
-
- down(&dentry->d_inode->i_sem);
- error = ext2_xattr_list(dentry->d_inode, buffer, size);
- up(&dentry->d_inode->i_sem);
-
- return error;
+ return ext2_xattr_list(dentry->d_inode, buffer, size);
}
/*
* Inode operation setxattr()
*
- * dentry->d_inode->i_sem down
+ * dentry->d_inode->i_sem: down
*/
int
ext2_setxattr(struct dentry *dentry, const char *name,
@@ -256,7 +235,7 @@ ext2_setxattr(struct dentry *dentry, const char *name,
/*
* Inode operation removexattr()
*
- * dentry->d_inode->i_sem down
+ * dentry->d_inode->i_sem: down
*/
int
ext2_removexattr(struct dentry *dentry, const char *name)
@@ -295,12 +274,15 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
if (name == NULL)
return -EINVAL;
+ down_read(&EXT2_I(inode)->xattr_sem);
+ error = -ENODATA;
if (!EXT2_I(inode)->i_file_acl)
- return -ENODATA;
+ goto cleanup;
ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
+ error = -EIO;
if (!bh)
- return -EIO;
+ goto cleanup;
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
end = bh->b_data + bh->b_size;
@@ -365,6 +347,7 @@ found:
cleanup:
brelse(bh);
+ up_read(&EXT2_I(inode)->xattr_sem);
return error;
}
@@ -391,12 +374,15 @@ ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
+ down_read(&EXT2_I(inode)->xattr_sem);
+ error = 0;
if (!EXT2_I(inode)->i_file_acl)
- return 0;
+ goto cleanup;
ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
+ error = -EIO;
if (!bh)
- return -EIO;
+ goto cleanup;
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
end = bh->b_data + bh->b_size;
@@ -449,6 +435,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
cleanup:
brelse(bh);
+ up_read(&EXT2_I(inode)->xattr_sem);
return error;
}
@@ -520,8 +507,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name,
name_len = strlen(name);
if (name_len > 255 || value_len > sb->s_blocksize)
return -ERANGE;
- down(&ext2_xattr_sem);
-
+ down_write(&EXT2_I(inode)->xattr_sem);
if (EXT2_I(inode)->i_file_acl) {
/* The inode already has an extended attribute block. */
bh = sb_bread(sb, EXT2_I(inode)->i_file_acl);
@@ -614,12 +600,16 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
/* Here we know that we can set the new attribute. */
if (header) {
+ /* assert(header == HDR(bh)); */
+ lock_buffer(bh);
if (header->h_refcount == cpu_to_le32(1)) {
ea_bdebug(bh, "modifying in-place");
ext2_xattr_cache_remove(bh);
+ /* keep the buffer locked while modifying it. */
} else {
int offset;
+ unlock_buffer(bh);
ea_bdebug(bh, "cloning");
header = kmalloc(bh->b_size, GFP_KERNEL);
error = -ENOMEM;
@@ -644,6 +634,8 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
last = here = ENTRY(header+1);
}
+ /* Iff we are modifying the block in-place, bh is locked here. */
+
if (not_found) {
/* Insert the new name. */
size_t size = EXT2_XATTR_LEN(name_len);
@@ -714,9 +706,13 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
skip_replace:
if (IS_LAST_ENTRY(ENTRY(header+1))) {
/* This block is now empty. */
+ if (bh && header == HDR(bh))
+ unlock_buffer(bh); /* we were modifying in-place. */
error = ext2_xattr_set2(inode, bh, NULL);
} else {
ext2_xattr_rehash(header, here);
+ if (bh && header == HDR(bh))
+ unlock_buffer(bh); /* we were modifying in-place. */
error = ext2_xattr_set2(inode, bh, header);
}
@@ -724,7 +720,7 @@ cleanup:
brelse(bh);
if (!(bh && header == HDR(bh)))
kfree(header);
- up(&ext2_xattr_sem);
+ up_write(&EXT2_I(inode)->xattr_sem);
return error;
}
@@ -744,24 +740,28 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
new_bh = ext2_xattr_cache_find(inode, header);
if (new_bh) {
/*
- * We found an identical block in the cache.
- * The old block will be released after updating
- * the inode.
+ * We found an identical block in the cache. The
+ * block returned is locked. The old block will
+ * be released after updating the inode.
*/
ea_bdebug(new_bh, "%s block %lu",
(old_bh == new_bh) ? "keeping" : "reusing",
(unsigned long) new_bh->b_blocknr);
error = -EDQUOT;
- if (DQUOT_ALLOC_BLOCK(inode, 1))
+ if (DQUOT_ALLOC_BLOCK(inode, 1)) {
+ unlock_buffer(new_bh);
goto cleanup;
+ }
HDR(new_bh)->h_refcount = cpu_to_le32(
le32_to_cpu(HDR(new_bh)->h_refcount) + 1);
ea_bdebug(new_bh, "refcount now=%d",
le32_to_cpu(HDR(new_bh)->h_refcount));
+ unlock_buffer(new_bh);
} else if (old_bh && header == HDR(old_bh)) {
- /* Keep this block. */
+ /* Keep this block. No need to lock the block as we
+ don't need to change the reference count. */
new_bh = old_bh;
get_bh(new_bh);
ext2_xattr_cache_insert(new_bh);
@@ -812,12 +812,11 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
error = 0;
if (old_bh && old_bh != new_bh) {
/*
- * If there was an old block, and we are not still using it,
- * we now release the old block.
- */
- unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount);
-
- if (refcount == 1) {
+ * If there was an old block and we are no longer using it,
+ * release the old block.
+ */
+ lock_buffer(old_bh);
+ if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
/* Free the old block. */
ea_bdebug(old_bh, "freeing");
ext2_free_blocks(inode, old_bh->b_blocknr, 1);
@@ -827,12 +826,14 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
bforget(old_bh);
} else {
/* Decrement the refcount only. */
- refcount--;
- HDR(old_bh)->h_refcount = cpu_to_le32(refcount);
+ HDR(old_bh)->h_refcount = cpu_to_le32(
+ le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
DQUOT_FREE_BLOCK(inode, 1);
mark_buffer_dirty(old_bh);
- ea_bdebug(old_bh, "refcount now=%d", refcount);
+ ea_bdebug(old_bh, "refcount now=%d",
+ le32_to_cpu(HDR(old_bh)->h_refcount));
}
+ unlock_buffer(old_bh);
}
cleanup:
@@ -850,12 +851,11 @@ cleanup:
void
ext2_xattr_delete_inode(struct inode *inode)
{
- struct buffer_head *bh;
+ struct buffer_head *bh = NULL;
+ down_write(&EXT2_I(inode)->xattr_sem);
if (!EXT2_I(inode)->i_file_acl)
- return;
- down(&ext2_xattr_sem);
-
+ goto cleanup;
bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
if (!bh) {
ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
@@ -871,7 +871,7 @@ ext2_xattr_delete_inode(struct inode *inode)
EXT2_I(inode)->i_file_acl);
goto cleanup;
}
- ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
+ lock_buffer(bh);
if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
ext2_xattr_cache_remove(bh);
ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
@@ -885,11 +885,13 @@ ext2_xattr_delete_inode(struct inode *inode)
sync_dirty_buffer(bh);
DQUOT_FREE_BLOCK(inode, 1);
}
+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
+ unlock_buffer(bh);
EXT2_I(inode)->i_file_acl = 0;
cleanup:
brelse(bh);
- up(&ext2_xattr_sem);
+ up_write(&EXT2_I(inode)->xattr_sem);
}
/*
@@ -982,8 +984,8 @@ ext2_xattr_cmp(struct ext2_xattr_header *header1,
*
* Find an identical extended attribute block.
*
- * Returns a pointer to the block found, or NULL if such a block was
- * not found or an error occurred.
+ * Returns a locked buffer head to the block found, or NULL if such
+ * a block was not found or an error occurred.
*/
static struct buffer_head *
ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
@@ -1003,18 +1005,23 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
ext2_error(inode->i_sb, "ext2_xattr_cache_find",
"inode %ld: block %ld read error",
inode->i_ino, (unsigned long) ce->e_block);
- } else if (le32_to_cpu(HDR(bh)->h_refcount) >
- EXT2_XATTR_REFCOUNT_MAX) {
- ea_idebug(inode, "block %ld refcount %d>%d",
- (unsigned long) ce->e_block,
- le32_to_cpu(HDR(bh)->h_refcount),
- EXT2_XATTR_REFCOUNT_MAX);
- } else if (!ext2_xattr_cmp(header, HDR(bh))) {
- ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count)));
- mb_cache_entry_release(ce);
- return bh;
+ } else {
+ lock_buffer(bh);
+ if (le32_to_cpu(HDR(bh)->h_refcount) >
+ EXT2_XATTR_REFCOUNT_MAX) {
+ ea_idebug(inode, "block %ld refcount %d>%d",
+ (unsigned long) ce->e_block,
+ le32_to_cpu(HDR(bh)->h_refcount),
+ EXT2_XATTR_REFCOUNT_MAX);
+ } else if (!ext2_xattr_cmp(header, HDR(bh))) {
+ ea_bdebug(bh, "b_count=%d",
+ atomic_read(&(bh->b_count)));
+ mb_cache_entry_release(ce);
+ return bh;
+ }
+ unlock_buffer(bh);
+ brelse(bh);
}
- brelse(bh);
ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
}
return NULL;