From: Andreas Gruenbacher This patch removes the dependency on i_sem in the getxattr and listxattr iops of ext2 and ext3. In addition, the global ext[23]_xattr semaphores go away. Instead of i_sem and the global semaphore, mutual exclusion is now ensured by per-inode xattr semaphores, and by locking the buffers before modifying them. The detailed locking strategy is described in comments in fs/ext[23]/xattr.c. Due to this change it is no longer necessary to take i_sem in ext[23]_permission() for retrieving acls, so the ext[23]_permission_locked() functions go away. Additionally, the patch fixes a race condition in ext[23]_permission: Accessing inode->i_acl was protected by the BKL in 2.4; in 2.5 there no longer is such protection. Instead, inode->i_acl (and inode->i_default_acl) are now accessed under inode->i_lock. (This could be replaced by RCU in the future.) In the ext3 extended attribute code, an new uglines results from locking at the buffer head level: The buffer lock must be held between testing if an xattr block can be modified and the actual modification to prevent races from happening. Before a block can be modified, ext3_journal_get_write_access() must be called. But this requies an unlocked buffer, so I call ext3_journal_get_write_access() before locking the buffer. If it turns out that the buffer cannot be modified, journal_release_buffer() is called. Calling ext3_journal_get_write_access after the test but while the buffer is still locked would be much better. fs/ext2/acl.c | 104 ++++++++++------------- fs/ext2/acl.h | 1 fs/ext2/ext2.h | 10 ++ fs/ext2/super.c | 3 fs/ext2/xattr.c | 155 ++++++++++++++++++----------------- fs/ext2/xattr_user.c | 12 -- fs/ext3/acl.c | 99 ++++++++++------------ fs/ext3/acl.h | 1 fs/ext3/super.c | 3 fs/ext3/xattr.c | 201 +++++++++++++++++++++++++--------------------- fs/ext3/xattr_user.c | 12 -- include/linux/ext3_fs_i.h | 10 ++ 12 files changed, 311 insertions(+), 300 deletions(-) diff -puN fs/ext2/acl.c~xattr-fine-grained-locking fs/ext2/acl.c --- 25/fs/ext2/acl.c~xattr-fine-grained-locking 2003-07-04 22:31:27.000000000 -0700 +++ 25-akpm/fs/ext2/acl.c 2003-07-04 22:31:39.000000000 -0700 @@ -124,14 +124,38 @@ fail: return ERR_PTR(-EINVAL); } +static inline struct posix_acl * +ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl) +{ + struct posix_acl *acl = EXT2_ACL_NOT_CACHED; + + spin_lock(&inode->i_lock); + if (*i_acl != EXT2_ACL_NOT_CACHED) + acl = posix_acl_dup(*i_acl); + spin_unlock(&inode->i_lock); + + return acl; +} + +static inline void +ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl, + struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*i_acl != EXT2_ACL_NOT_CACHED) + posix_acl_release(*i_acl); + *i_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + /* - * inode->i_sem: down + * inode->i_sem: don't care */ static struct posix_acl * ext2_get_acl(struct inode *inode, int type) { const size_t max_size = ext2_acl_size(EXT2_ACL_MAX_ENTRIES); - struct ext2_inode_inode *ei = EXT2_I(inode); + struct ext2_inode_info *ei = EXT2_I(inode); int name_index; char *value; struct posix_acl *acl; @@ -142,14 +166,16 @@ ext2_get_acl(struct inode *inode, int ty switch(type) { case ACL_TYPE_ACCESS: - if (ei->i_acl != EXT2_ACL_NOT_CACHED) - return posix_acl_dup(ei->i_acl); + acl = ext2_iget_acl(inode, &ei->i_acl); + if (acl != EXT2_ACL_NOT_CACHED) + return acl; name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: - if (ei->i_default_acl != EXT2_ACL_NOT_CACHED) - return posix_acl_dup(ei->i_default_acl); + acl = ext2_iget_acl(inode, &ei->i_default_acl); + if (acl != EXT2_ACL_NOT_CACHED) + return acl; name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; break; @@ -171,11 +197,11 @@ ext2_get_acl(struct inode *inode, int ty if (!IS_ERR(acl)) { switch(type) { case ACL_TYPE_ACCESS: - ei->i_acl = posix_acl_dup(acl); + ext2_iset_acl(inode, &ei->i_acl, acl); break; case ACL_TYPE_DEFAULT: - ei->i_default_acl = posix_acl_dup(acl); + ext2_iset_acl(inode, &ei->i_default_acl, acl); break; } } @@ -240,23 +266,24 @@ ext2_set_acl(struct inode *inode, int ty if (!error) { switch(type) { case ACL_TYPE_ACCESS: - if (ei->i_acl != EXT2_ACL_NOT_CACHED) - posix_acl_release(ei->i_acl); - ei->i_acl = posix_acl_dup(acl); + ext2_iset_acl(inode, &ei->i_acl, acl); break; case ACL_TYPE_DEFAULT: - if (ei->i_default_acl != EXT2_ACL_NOT_CACHED) - posix_acl_release(ei->i_default_acl); - ei->i_default_acl = posix_acl_dup(acl); + ext2_iset_acl(inode, &ei->i_default_acl, acl); break; } } return error; } -static int -__ext2_permission(struct inode *inode, int mask, int lock) +/* + * Inode operation permission(). + * + * inode->i_sem: don't care + */ +int +ext2_permission(struct inode *inode, int mask, struct nameidata *nd) { int mode = inode->i_mode; @@ -270,30 +297,16 @@ __ext2_permission(struct inode *inode, i if (current->fsuid == inode->i_uid) { mode >>= 6; } else if (test_opt(inode->i_sb, POSIX_ACL)) { - struct ext2_inode_info *ei = EXT2_I(inode); + struct posix_acl *acl; /* The access ACL cannot grant access if the group class permission bits don't contain all requested permissions. */ if (((mode >> 3) & mask & S_IRWXO) != mask) goto check_groups; - if (ei->i_acl == EXT2_ACL_NOT_CACHED) { - struct posix_acl *acl; - - if (lock) { - down(&inode->i_sem); - acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); - up(&inode->i_sem); - } else - acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); - - if (IS_ERR(acl)) - return PTR_ERR(acl); + acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); + if (acl) { + int error = posix_acl_permission(inode, acl, mask); posix_acl_release(acl); - if (ei->i_acl == EXT2_ACL_NOT_CACHED) - return -EIO; - } - if (ei->i_acl) { - int error = posix_acl_permission(inode, ei->i_acl,mask); if (error == -EACCES) goto check_capabilities; return error; @@ -320,32 +333,10 @@ check_capabilities: } /* - * Inode operation permission(). - * - * inode->i_sem: up - * BKL held [before 2.5.x] - */ -int -ext2_permission(struct inode *inode, int mask, struct nameidata *nd) -{ - return __ext2_permission(inode, mask, 1); -} - -/* - * Used internally if i_sem is already down. - */ -int -ext2_permission_locked(struct inode *inode, int mask) -{ - return __ext2_permission(inode, mask, 0); -} - -/* * Initialize the ACLs of a new inode. Called from ext2_new_inode. * * dir->i_sem: down * inode->i_sem: up (access to inode is still exclusive) - * BKL held [before 2.5.x] */ int ext2_init_acl(struct inode *inode, struct inode *dir) @@ -405,7 +396,6 @@ cleanup: * file mode. * * inode->i_sem: down - * BKL held [before 2.5.x] */ int ext2_acl_chmod(struct inode *inode) diff -puN fs/ext2/acl.h~xattr-fine-grained-locking fs/ext2/acl.h --- 25/fs/ext2/acl.h~xattr-fine-grained-locking 2003-07-04 22:31:27.000000000 -0700 +++ 25-akpm/fs/ext2/acl.h 2003-07-04 22:31:27.000000000 -0700 @@ -60,7 +60,6 @@ static inline int ext2_acl_count(size_t /* acl.c */ extern int ext2_permission (struct inode *, int, struct nameidata *); -extern int ext2_permission_locked (struct inode *, int); extern int ext2_acl_chmod (struct inode *); extern int ext2_init_acl (struct inode *, struct inode *); diff -puN fs/ext2/ext2.h~xattr-fine-grained-locking fs/ext2/ext2.h --- 25/fs/ext2/ext2.h~xattr-fine-grained-locking 2003-07-04 22:31:27.000000000 -0700 +++ 25-akpm/fs/ext2/ext2.h 2003-07-04 22:31:27.000000000 -0700 @@ -41,6 +41,16 @@ struct ext2_inode_info { __u32 i_prealloc_block; __u32 i_prealloc_count; __u32 i_dir_start_lookup; +#ifdef CONFIG_EXT2_FS_XATTR + /* + * Extended attributes can be read independently of the main file + * data. Taking i_sem even when reading would cause contention + * between readers of EAs and writers of regular file data, so + * instead we synchronize on xattr_sem when reading or changing + * EAs. + */ + struct rw_semaphore xattr_sem; +#endif #ifdef CONFIG_EXT2_FS_POSIX_ACL struct posix_acl *i_acl; struct posix_acl *i_default_acl; diff -puN fs/ext2/super.c~xattr-fine-grained-locking fs/ext2/super.c --- 25/fs/ext2/super.c~xattr-fine-grained-locking 2003-07-04 22:31:27.000000000 -0700 +++ 25-akpm/fs/ext2/super.c 2003-07-04 22:31:27.000000000 -0700 @@ -177,6 +177,9 @@ static void init_once(void * foo, kmem_c if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { rwlock_init(&ei->i_meta_lock); +#ifdef CONFIG_EXT2_FS_XATTR + init_rwsem(&ei->xattr_sem); +#endif inode_init_once(&ei->vfs_inode); } } diff -puN fs/ext2/xattr.c~xattr-fine-grained-locking fs/ext2/xattr.c --- 25/fs/ext2/xattr.c~xattr-fine-grained-locking 2003-07-04 22:31:27.000000000 -0700 +++ 25-akpm/fs/ext2/xattr.c 2003-07-04 22:31:27.000000000 -0700 @@ -42,13 +42,12 @@ * * Locking strategy * ---------------- - * The VFS already holds the BKL and the inode->i_sem semaphore when any of - * the xattr inode operations are called, so we are guaranteed that only one - * processes accesses extended attributes of an inode at any time. - * - * For writing we also grab the ext2_xattr_sem semaphore. This ensures that - * only a single process is modifying an extended attribute block, even - * if the block is shared among inodes. + * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem. + * EA blocks are only changed if they are exclusive to an inode, so + * holding xattr_sem also means that nothing but the EA block's reference + * count will change. Multiple writers to an EA block are synchronized + * by the bh lock. No more than a single bh lock is held at any time + * to avoid deadlocks. */ #include @@ -57,7 +56,7 @@ #include #include #include -#include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -105,15 +104,6 @@ static void ext2_xattr_rehash(struct ext struct ext2_xattr_entry *); static struct mb_cache *ext2_xattr_cache; - -/* - * If a file system does not share extended attributes among inodes, - * we should not need the ext2_xattr_sem semaphore. However, the - * filesystem may still contain shared blocks, so we always take - * the lock. - */ - -static DECLARE_MUTEX(ext2_xattr_sem); static struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX]; static rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED; @@ -196,7 +186,7 @@ ext2_xattr_handler(int name_index) /* * Inode operation getxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: don't care */ ssize_t ext2_getxattr(struct dentry *dentry, const char *name, @@ -204,39 +194,28 @@ ext2_getxattr(struct dentry *dentry, con { struct ext2_xattr_handler *handler; struct inode *inode = dentry->d_inode; - ssize_t error; handler = ext2_xattr_resolve_name(&name); if (!handler) return -EOPNOTSUPP; - down(&inode->i_sem); - error = handler->get(inode, name, buffer, size); - up(&inode->i_sem); - - return error; + return handler->get(inode, name, buffer, size); } /* * Inode operation listxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: don't care */ ssize_t ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) { - ssize_t error; - - down(&dentry->d_inode->i_sem); - error = ext2_xattr_list(dentry->d_inode, buffer, size); - up(&dentry->d_inode->i_sem); - - return error; + return ext2_xattr_list(dentry->d_inode, buffer, size); } /* * Inode operation setxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: down */ int ext2_setxattr(struct dentry *dentry, const char *name, @@ -256,7 +235,7 @@ ext2_setxattr(struct dentry *dentry, con /* * Inode operation removexattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: down */ int ext2_removexattr(struct dentry *dentry, const char *name) @@ -295,12 +274,15 @@ ext2_xattr_get(struct inode *inode, int if (name == NULL) return -EINVAL; + down_read(&EXT2_I(inode)->xattr_sem); + error = -ENODATA; if (!EXT2_I(inode)->i_file_acl) - return -ENODATA; + goto cleanup; ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); + error = -EIO; if (!bh) - return -EIO; + goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; @@ -365,6 +347,7 @@ found: cleanup: brelse(bh); + up_read(&EXT2_I(inode)->xattr_sem); return error; } @@ -391,12 +374,15 @@ ext2_xattr_list(struct inode *inode, cha ea_idebug(inode, "buffer=%p, buffer_size=%ld", buffer, (long)buffer_size); + down_read(&EXT2_I(inode)->xattr_sem); + error = 0; if (!EXT2_I(inode)->i_file_acl) - return 0; + goto cleanup; ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); + error = -EIO; if (!bh) - return -EIO; + goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; @@ -449,6 +435,7 @@ bad_block: ext2_error(inode->i_sb, "ext2 cleanup: brelse(bh); + up_read(&EXT2_I(inode)->xattr_sem); return error; } @@ -520,8 +507,7 @@ ext2_xattr_set(struct inode *inode, int name_len = strlen(name); if (name_len > 255 || value_len > sb->s_blocksize) return -ERANGE; - down(&ext2_xattr_sem); - + down_write(&EXT2_I(inode)->xattr_sem); if (EXT2_I(inode)->i_file_acl) { /* The inode already has an extended attribute block. */ bh = sb_bread(sb, EXT2_I(inode)->i_file_acl); @@ -614,12 +600,16 @@ bad_block: ext2_error(sb, "ext2_xattr_s /* Here we know that we can set the new attribute. */ if (header) { + /* assert(header == HDR(bh)); */ + lock_buffer(bh); if (header->h_refcount == cpu_to_le32(1)) { ea_bdebug(bh, "modifying in-place"); ext2_xattr_cache_remove(bh); + /* keep the buffer locked while modifying it. */ } else { int offset; + unlock_buffer(bh); ea_bdebug(bh, "cloning"); header = kmalloc(bh->b_size, GFP_KERNEL); error = -ENOMEM; @@ -644,6 +634,8 @@ bad_block: ext2_error(sb, "ext2_xattr_s last = here = ENTRY(header+1); } + /* Iff we are modifying the block in-place, bh is locked here. */ + if (not_found) { /* Insert the new name. */ size_t size = EXT2_XATTR_LEN(name_len); @@ -714,9 +706,13 @@ bad_block: ext2_error(sb, "ext2_xattr_s skip_replace: if (IS_LAST_ENTRY(ENTRY(header+1))) { /* This block is now empty. */ + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ error = ext2_xattr_set2(inode, bh, NULL); } else { ext2_xattr_rehash(header, here); + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ error = ext2_xattr_set2(inode, bh, header); } @@ -724,7 +720,7 @@ cleanup: brelse(bh); if (!(bh && header == HDR(bh))) kfree(header); - up(&ext2_xattr_sem); + up_write(&EXT2_I(inode)->xattr_sem); return error; } @@ -744,24 +740,28 @@ ext2_xattr_set2(struct inode *inode, str new_bh = ext2_xattr_cache_find(inode, header); if (new_bh) { /* - * We found an identical block in the cache. - * The old block will be released after updating - * the inode. + * We found an identical block in the cache. The + * block returned is locked. The old block will + * be released after updating the inode. */ ea_bdebug(new_bh, "%s block %lu", (old_bh == new_bh) ? "keeping" : "reusing", (unsigned long) new_bh->b_blocknr); error = -EDQUOT; - if (DQUOT_ALLOC_BLOCK(inode, 1)) + if (DQUOT_ALLOC_BLOCK(inode, 1)) { + unlock_buffer(new_bh); goto cleanup; + } HDR(new_bh)->h_refcount = cpu_to_le32( le32_to_cpu(HDR(new_bh)->h_refcount) + 1); ea_bdebug(new_bh, "refcount now=%d", le32_to_cpu(HDR(new_bh)->h_refcount)); + unlock_buffer(new_bh); } else if (old_bh && header == HDR(old_bh)) { - /* Keep this block. */ + /* Keep this block. No need to lock the block as we + don't need to change the reference count. */ new_bh = old_bh; get_bh(new_bh); ext2_xattr_cache_insert(new_bh); @@ -812,12 +812,11 @@ ext2_xattr_set2(struct inode *inode, str error = 0; if (old_bh && old_bh != new_bh) { /* - * If there was an old block, and we are not still using it, - * we now release the old block. - */ - unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); - - if (refcount == 1) { + * If there was an old block and we are no longer using it, + * release the old block. + */ + lock_buffer(old_bh); + if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { /* Free the old block. */ ea_bdebug(old_bh, "freeing"); ext2_free_blocks(inode, old_bh->b_blocknr, 1); @@ -827,12 +826,14 @@ ext2_xattr_set2(struct inode *inode, str bforget(old_bh); } else { /* Decrement the refcount only. */ - refcount--; - HDR(old_bh)->h_refcount = cpu_to_le32(refcount); + HDR(old_bh)->h_refcount = cpu_to_le32( + le32_to_cpu(HDR(old_bh)->h_refcount) - 1); DQUOT_FREE_BLOCK(inode, 1); mark_buffer_dirty(old_bh); - ea_bdebug(old_bh, "refcount now=%d", refcount); + ea_bdebug(old_bh, "refcount now=%d", + le32_to_cpu(HDR(old_bh)->h_refcount)); } + unlock_buffer(old_bh); } cleanup: @@ -850,12 +851,11 @@ cleanup: void ext2_xattr_delete_inode(struct inode *inode) { - struct buffer_head *bh; + struct buffer_head *bh = NULL; + down_write(&EXT2_I(inode)->xattr_sem); if (!EXT2_I(inode)->i_file_acl) - return; - down(&ext2_xattr_sem); - + goto cleanup; bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); if (!bh) { ext2_error(inode->i_sb, "ext2_xattr_delete_inode", @@ -871,7 +871,7 @@ ext2_xattr_delete_inode(struct inode *in EXT2_I(inode)->i_file_acl); goto cleanup; } - ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); + lock_buffer(bh); if (HDR(bh)->h_refcount == cpu_to_le32(1)) { ext2_xattr_cache_remove(bh); ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); @@ -885,11 +885,13 @@ ext2_xattr_delete_inode(struct inode *in sync_dirty_buffer(bh); DQUOT_FREE_BLOCK(inode, 1); } + ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); + unlock_buffer(bh); EXT2_I(inode)->i_file_acl = 0; cleanup: brelse(bh); - up(&ext2_xattr_sem); + up_write(&EXT2_I(inode)->xattr_sem); } /* @@ -982,8 +984,8 @@ ext2_xattr_cmp(struct ext2_xattr_header * * Find an identical extended attribute block. * - * Returns a pointer to the block found, or NULL if such a block was - * not found or an error occurred. + * Returns a locked buffer head to the block found, or NULL if such + * a block was not found or an error occurred. */ static struct buffer_head * ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) @@ -1003,18 +1005,23 @@ ext2_xattr_cache_find(struct inode *inod ext2_error(inode->i_sb, "ext2_xattr_cache_find", "inode %ld: block %ld read error", inode->i_ino, (unsigned long) ce->e_block); - } else if (le32_to_cpu(HDR(bh)->h_refcount) > - EXT2_XATTR_REFCOUNT_MAX) { - ea_idebug(inode, "block %ld refcount %d>%d", - (unsigned long) ce->e_block, - le32_to_cpu(HDR(bh)->h_refcount), - EXT2_XATTR_REFCOUNT_MAX); - } else if (!ext2_xattr_cmp(header, HDR(bh))) { - ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); - mb_cache_entry_release(ce); - return bh; + } else { + lock_buffer(bh); + if (le32_to_cpu(HDR(bh)->h_refcount) > + EXT2_XATTR_REFCOUNT_MAX) { + ea_idebug(inode, "block %ld refcount %d>%d", + (unsigned long) ce->e_block, + le32_to_cpu(HDR(bh)->h_refcount), + EXT2_XATTR_REFCOUNT_MAX); + } else if (!ext2_xattr_cmp(header, HDR(bh))) { + ea_bdebug(bh, "b_count=%d", + atomic_read(&(bh->b_count))); + mb_cache_entry_release(ce); + return bh; + } + unlock_buffer(bh); + brelse(bh); } - brelse(bh); ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); } return NULL; diff -puN fs/ext2/xattr_user.c~xattr-fine-grained-locking fs/ext2/xattr_user.c --- 25/fs/ext2/xattr_user.c~xattr-fine-grained-locking 2003-07-04 22:31:27.000000000 -0700 +++ 25-akpm/fs/ext2/xattr_user.c 2003-07-04 22:31:27.000000000 -0700 @@ -11,10 +11,6 @@ #include "ext2.h" #include "xattr.h" -#ifdef CONFIG_EXT2_FS_POSIX_ACL -# include "acl.h" -#endif - #define XATTR_USER_PREFIX "user." static size_t @@ -44,11 +40,7 @@ ext2_xattr_user_get(struct inode *inode, return -EINVAL; if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; -#ifdef CONFIG_EXT2_FS_POSIX_ACL - error = ext2_permission_locked(inode, MAY_READ); -#else error = permission(inode, MAY_READ, NULL); -#endif if (error) return error; @@ -68,11 +60,7 @@ ext2_xattr_user_set(struct inode *inode, if ( !S_ISREG(inode->i_mode) && (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) return -EPERM; -#ifdef CONFIG_EXT2_FS_POSIX_ACL - error = ext2_permission_locked(inode, MAY_WRITE); -#else error = permission(inode, MAY_WRITE, NULL); -#endif if (error) return error; diff -puN fs/ext3/acl.c~xattr-fine-grained-locking fs/ext3/acl.c --- 25/fs/ext3/acl.c~xattr-fine-grained-locking 2003-07-04 22:31:27.000000000 -0700 +++ 25-akpm/fs/ext3/acl.c 2003-07-04 22:31:27.000000000 -0700 @@ -125,10 +125,34 @@ fail: return ERR_PTR(-EINVAL); } +static inline struct posix_acl * +ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl) +{ + struct posix_acl *acl = EXT3_ACL_NOT_CACHED; + + spin_lock(&inode->i_lock); + if (*i_acl != EXT3_ACL_NOT_CACHED) + acl = posix_acl_dup(*i_acl); + spin_unlock(&inode->i_lock); + + return acl; +} + +static inline void +ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl, + struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*i_acl != EXT3_ACL_NOT_CACHED) + posix_acl_release(*i_acl); + *i_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + /* * Inode operation get_posix_acl(). * - * inode->i_sem: down + * inode->i_sem: don't care */ static struct posix_acl * ext3_get_acl(struct inode *inode, int type) @@ -145,14 +169,16 @@ ext3_get_acl(struct inode *inode, int ty switch(type) { case ACL_TYPE_ACCESS: - if (ei->i_acl != EXT3_ACL_NOT_CACHED) - return posix_acl_dup(ei->i_acl); + acl = ext3_iget_acl(inode, &ei->i_acl); + if (acl != EXT3_ACL_NOT_CACHED) + return acl; name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: - if (ei->i_default_acl != EXT3_ACL_NOT_CACHED) - return posix_acl_dup(ei->i_default_acl); + acl = ext3_iget_acl(inode, &ei->i_default_acl); + if (acl != EXT3_ACL_NOT_CACHED) + return acl; name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; break; @@ -174,11 +200,11 @@ ext3_get_acl(struct inode *inode, int ty if (!IS_ERR(acl)) { switch(type) { case ACL_TYPE_ACCESS: - ei->i_acl = posix_acl_dup(acl); + ext3_iset_acl(inode, &ei->i_acl, acl); break; case ACL_TYPE_DEFAULT: - ei->i_default_acl = posix_acl_dup(acl); + ext3_iset_acl(inode, &ei->i_default_acl, acl); break; } } @@ -245,23 +271,24 @@ ext3_set_acl(handle_t *handle, struct in if (!error) { switch(type) { case ACL_TYPE_ACCESS: - if (ei->i_acl != EXT3_ACL_NOT_CACHED) - posix_acl_release(ei->i_acl); - ei->i_acl = posix_acl_dup(acl); + ext3_iset_acl(inode, &ei->i_acl, acl); break; case ACL_TYPE_DEFAULT: - if (ei->i_default_acl != EXT3_ACL_NOT_CACHED) - posix_acl_release(ei->i_default_acl); - ei->i_default_acl = posix_acl_dup(acl); + ext3_iset_acl(inode, &ei->i_default_acl, acl); break; } } return error; } -static int -__ext3_permission(struct inode *inode, int mask, int lock) +/* + * Inode operation permission(). + * + * inode->i_sem: don't care + */ +int +ext3_permission(struct inode *inode, int mask, struct nameidata *nd) { int mode = inode->i_mode; @@ -275,30 +302,16 @@ __ext3_permission(struct inode *inode, i if (current->fsuid == inode->i_uid) { mode >>= 6; } else if (test_opt(inode->i_sb, POSIX_ACL)) { - struct ext3_inode_info *ei = EXT3_I(inode); + struct posix_acl *acl; /* The access ACL cannot grant access if the group class permission bits don't contain all requested permissions. */ if (((mode >> 3) & mask & S_IRWXO) != mask) goto check_groups; - if (ei->i_acl == EXT3_ACL_NOT_CACHED) { - struct posix_acl *acl; - - if (lock) { - down(&inode->i_sem); - acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); - up(&inode->i_sem); - } else - acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); - - if (IS_ERR(acl)) - return PTR_ERR(acl); + acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); + if (acl) { + int error = posix_acl_permission(inode, acl, mask); posix_acl_release(acl); - if (ei->i_acl == EXT3_ACL_NOT_CACHED) - return -EIO; - } - if (ei->i_acl) { - int error = posix_acl_permission(inode, ei->i_acl,mask); if (error == -EACCES) goto check_capabilities; return error; @@ -325,26 +338,6 @@ check_capabilities: } /* - * Inode operation permission(). - * - * inode->i_sem: up - */ -int -ext3_permission(struct inode *inode, int mask, struct nameidata *nd) -{ - return __ext3_permission(inode, mask, 1); -} - -/* - * Used internally if i_sem is already down. - */ -int -ext3_permission_locked(struct inode *inode, int mask) -{ - return __ext3_permission(inode, mask, 0); -} - -/* * Initialize the ACLs of a new inode. Called from ext3_new_inode. * * dir->i_sem: down diff -puN fs/ext3/acl.h~xattr-fine-grained-locking fs/ext3/acl.h --- 25/fs/ext3/acl.h~xattr-fine-grained-locking 2003-07-04 22:31:27.000000000 -0700 +++ 25-akpm/fs/ext3/acl.h 2003-07-04 22:31:27.000000000 -0700 @@ -60,7 +60,6 @@ static inline int ext3_acl_count(size_t /* acl.c */ extern int ext3_permission (struct inode *, int, struct nameidata *); -extern int ext3_permission_locked (struct inode *, int); extern int ext3_acl_chmod (struct inode *); extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); diff -puN fs/ext3/super.c~xattr-fine-grained-locking fs/ext3/super.c --- 25/fs/ext3/super.c~xattr-fine-grained-locking 2003-07-04 22:31:27.000000000 -0700 +++ 25-akpm/fs/ext3/super.c 2003-07-04 22:31:27.000000000 -0700 @@ -519,6 +519,9 @@ static void init_once(void * foo, kmem_c if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { INIT_LIST_HEAD(&ei->i_orphan); +#ifdef CONFIG_EXT3_FS_XATTR + init_rwsem(&ei->xattr_sem); +#endif init_rwsem(&ei->truncate_sem); inode_init_once(&ei->vfs_inode); } diff -puN fs/ext3/xattr.c~xattr-fine-grained-locking fs/ext3/xattr.c --- 25/fs/ext3/xattr.c~xattr-fine-grained-locking 2003-07-04 22:31:27.000000000 -0700 +++ 25-akpm/fs/ext3/xattr.c 2003-07-04 22:31:27.000000000 -0700 @@ -43,13 +43,12 @@ * * Locking strategy * ---------------- - * The VFS holdsinode->i_sem semaphore when any of the xattr inode - * operations are called, so we are guaranteed that only one - * processes accesses extended attributes of an inode at any time. - * - * For writing we also grab the ext3_xattr_sem semaphore. This ensures that - * only a single process is modifying an extended attribute block, even - * if the block is shared among inodes. + * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem. + * EA blocks are only changed if they are exclusive to an inode, so + * holding xattr_sem also means that nothing but the EA block's reference + * count will change. Multiple writers to an EA block are synchronized + * by the bh lock. No more than a single bh lock is held at any time + * to avoid deadlocks. */ #include @@ -59,7 +58,7 @@ #include #include #include -#include +#include #include "xattr.h" #include "acl.h" @@ -93,22 +92,14 @@ static int ext3_xattr_set_handle2(handle struct ext3_xattr_header *); static int ext3_xattr_cache_insert(struct buffer_head *); -static struct buffer_head *ext3_xattr_cache_find(struct inode *, - struct ext3_xattr_header *); +static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *, + struct ext3_xattr_header *, + int *); static void ext3_xattr_cache_remove(struct buffer_head *); static void ext3_xattr_rehash(struct ext3_xattr_header *, struct ext3_xattr_entry *); static struct mb_cache *ext3_xattr_cache; - -/* - * If a file system does not share extended attributes among inodes, - * we should not need the ext3_xattr_sem semaphore. However, the - * filesystem may still contain shared blocks, so we always take - * the lock. - */ - -static DECLARE_MUTEX(ext3_xattr_sem); static struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; static rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; @@ -191,7 +182,7 @@ ext3_xattr_handler(int name_index) /* * Inode operation getxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: don't care */ ssize_t ext3_getxattr(struct dentry *dentry, const char *name, @@ -199,39 +190,28 @@ ext3_getxattr(struct dentry *dentry, con { struct ext3_xattr_handler *handler; struct inode *inode = dentry->d_inode; - ssize_t error; handler = ext3_xattr_resolve_name(&name); if (!handler) return -EOPNOTSUPP; - down(&inode->i_sem); - error = handler->get(inode, name, buffer, size); - up(&inode->i_sem); - - return error; + return handler->get(inode, name, buffer, size); } /* * Inode operation listxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: don't care */ ssize_t ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) { - ssize_t error; - - down(&dentry->d_inode->i_sem); - error = ext3_xattr_list(dentry->d_inode, buffer, size); - up(&dentry->d_inode->i_sem); - - return error; + return ext3_xattr_list(dentry->d_inode, buffer, size); } /* * Inode operation setxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: down */ int ext3_setxattr(struct dentry *dentry, const char *name, @@ -251,7 +231,7 @@ ext3_setxattr(struct dentry *dentry, con /* * Inode operation removexattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_sem: down */ int ext3_removexattr(struct dentry *dentry, const char *name) @@ -290,12 +270,15 @@ ext3_xattr_get(struct inode *inode, int if (name == NULL) return -EINVAL; + down_read(&EXT3_I(inode)->xattr_sem); + error = -ENODATA; if (!EXT3_I(inode)->i_file_acl) - return -ENODATA; + goto cleanup; ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); + error = -EIO; if (!bh) - return -EIO; + goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; @@ -360,6 +343,7 @@ found: cleanup: brelse(bh); + up_read(&EXT3_I(inode)->xattr_sem); return error; } @@ -386,12 +370,15 @@ ext3_xattr_list(struct inode *inode, cha ea_idebug(inode, "buffer=%p, buffer_size=%ld", buffer, (long)buffer_size); + down_read(&EXT3_I(inode)->xattr_sem); + error = 0; if (!EXT3_I(inode)->i_file_acl) - return 0; + goto cleanup; ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); + error = -EIO; if (!bh) - return -EIO; + goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; @@ -444,6 +431,7 @@ bad_block: ext3_error(inode->i_sb, "ext3 cleanup: brelse(bh); + up_read(&EXT3_I(inode)->xattr_sem); return error; } @@ -459,11 +447,12 @@ static void ext3_xattr_update_super_bloc return; lock_super(sb); - ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - EXT3_SB(sb)->s_es->s_feature_compat |= - cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); - sb->s_dirt = 1; - ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) { + EXT3_SB(sb)->s_es->s_feature_compat |= + cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); + sb->s_dirt = 1; + ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + } unlock_super(sb); } @@ -518,8 +507,7 @@ ext3_xattr_set_handle(handle_t *handle, name_len = strlen(name); if (name_len > 255 || value_len > sb->s_blocksize) return -ERANGE; - down(&ext3_xattr_sem); - + down_write(&EXT3_I(inode)->xattr_sem); if (EXT3_I(inode)->i_file_acl) { /* The inode already has an extended attribute block. */ bh = sb_bread(sb, EXT3_I(inode)->i_file_acl); @@ -612,15 +600,28 @@ bad_block: ext3_error(sb, "ext3_xattr_s /* Here we know that we can set the new attribute. */ if (header) { + int credits = 0; + + /* assert(header == HDR(bh)); */ + if (header->h_refcount != cpu_to_le32(1)) + goto skip_get_write_access; + /* ext3_journal_get_write_access() requires an unlocked bh, + which complicates things here. */ + error = ext3_journal_get_write_access_credits(handle, bh, + &credits); + if (error) + goto cleanup; + lock_buffer(bh); if (header->h_refcount == cpu_to_le32(1)) { ea_bdebug(bh, "modifying in-place"); ext3_xattr_cache_remove(bh); - error = ext3_journal_get_write_access(handle, bh); - if (error) - goto cleanup; + /* keep the buffer locked while modifying it. */ } else { int offset; + unlock_buffer(bh); + journal_release_buffer(handle, bh, credits); + skip_get_write_access: ea_bdebug(bh, "cloning"); header = kmalloc(bh->b_size, GFP_KERNEL); error = -ENOMEM; @@ -645,6 +646,8 @@ bad_block: ext3_error(sb, "ext3_xattr_s last = here = ENTRY(header+1); } + /* Iff we are modifying the block in-place, bh is locked here. */ + if (not_found) { /* Insert the new name. */ size_t size = EXT3_XATTR_LEN(name_len); @@ -715,9 +718,13 @@ bad_block: ext3_error(sb, "ext3_xattr_s skip_replace: if (IS_LAST_ENTRY(ENTRY(header+1))) { /* This block is now empty. */ + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ error = ext3_xattr_set_handle2(handle, inode, bh, NULL); } else { ext3_xattr_rehash(header, here); + if (bh && header == HDR(bh)) + unlock_buffer(bh); /* we were modifying in-place. */ error = ext3_xattr_set_handle2(handle, inode, bh, header); } @@ -725,7 +732,7 @@ cleanup: brelse(bh); if (!(bh && header == HDR(bh))) kfree(header); - up(&ext3_xattr_sem); + up_write(&EXT3_I(inode)->xattr_sem); return error; } @@ -740,33 +747,34 @@ ext3_xattr_set_handle2(handle_t *handle, { struct super_block *sb = inode->i_sb; struct buffer_head *new_bh = NULL; - int error; + int credits = 0, error; if (header) { - new_bh = ext3_xattr_cache_find(inode, header); + new_bh = ext3_xattr_cache_find(handle, inode, header, &credits); if (new_bh) { /* - * We found an identical block in the cache. - * The old block will be released after updating - * the inode. + * We found an identical block in the cache. The + * block returned is locked. The old block will + * be released after updating the inode. */ ea_bdebug(new_bh, "%s block %lu", (old_bh == new_bh) ? "keeping" : "reusing", (unsigned long) new_bh->b_blocknr); error = -EDQUOT; - if (DQUOT_ALLOC_BLOCK(inode, 1)) - goto cleanup; - - error = ext3_journal_get_write_access(handle, new_bh); - if (error) + if (DQUOT_ALLOC_BLOCK(inode, 1)) { + unlock_buffer(new_bh); + journal_release_buffer(handle, new_bh, credits); goto cleanup; + } HDR(new_bh)->h_refcount = cpu_to_le32( le32_to_cpu(HDR(new_bh)->h_refcount) + 1); ea_bdebug(new_bh, "refcount now=%d", le32_to_cpu(HDR(new_bh)->h_refcount)); + unlock_buffer(new_bh); } else if (old_bh && header == HDR(old_bh)) { - /* Keep this block. */ + /* Keep this block. No need to lock the block as we + * don't need to change the reference count. */ new_bh = old_bh; get_bh(new_bh); ext3_xattr_cache_insert(new_bh); @@ -817,15 +825,14 @@ getblk_failed: error = 0; if (old_bh && old_bh != new_bh) { /* - * If there was an old block, and we are not still using it, - * we now release the old block. + * If there was an old block, and we are no longer using it, + * release the old block. */ - unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); - error = ext3_journal_get_write_access(handle, old_bh); if (error) goto cleanup; - if (refcount == 1) { + lock_buffer(old_bh); + if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { /* Free the old block. */ ea_bdebug(old_bh, "freeing"); ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); @@ -837,12 +844,14 @@ getblk_failed: ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); } else { /* Decrement the refcount only. */ - refcount--; - HDR(old_bh)->h_refcount = cpu_to_le32(refcount); + HDR(old_bh)->h_refcount = cpu_to_le32( + le32_to_cpu(HDR(old_bh)->h_refcount) - 1); DQUOT_FREE_BLOCK(inode, 1); ext3_journal_dirty_metadata(handle, old_bh); - ea_bdebug(old_bh, "refcount now=%d", refcount); + ea_bdebug(old_bh, "refcount now=%d", + le32_to_cpu(HDR(old_bh)->h_refcount)); } + unlock_buffer(old_bh); } cleanup: @@ -886,12 +895,11 @@ ext3_xattr_set(struct inode *inode, int void ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) { - struct buffer_head *bh; + struct buffer_head *bh = NULL; + down_write(&EXT3_I(inode)->xattr_sem); if (!EXT3_I(inode)->i_file_acl) - return; - down(&ext3_xattr_sem); - + goto cleanup; bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); if (!bh) { ext3_error(inode->i_sb, "ext3_xattr_delete_inode", @@ -899,7 +907,6 @@ ext3_xattr_delete_inode(handle_t *handle EXT3_I(inode)->i_file_acl); goto cleanup; } - ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || HDR(bh)->h_blocks != cpu_to_le32(1)) { ext3_error(inode->i_sb, "ext3_xattr_delete_inode", @@ -907,8 +914,9 @@ ext3_xattr_delete_inode(handle_t *handle EXT3_I(inode)->i_file_acl); goto cleanup; } - ext3_journal_get_write_access(handle, bh); - ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); + if (ext3_journal_get_write_access(handle, bh) != 0) + goto cleanup; + lock_buffer(bh); if (HDR(bh)->h_refcount == cpu_to_le32(1)) { ext3_xattr_cache_remove(bh); ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); @@ -922,11 +930,13 @@ ext3_xattr_delete_inode(handle_t *handle handle->h_sync = 1; DQUOT_FREE_BLOCK(inode, 1); } + ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); + unlock_buffer(bh); EXT3_I(inode)->i_file_acl = 0; cleanup: brelse(bh); - up(&ext3_xattr_sem); + up_write(&EXT3_I(inode)->xattr_sem); } /* @@ -1022,7 +1032,8 @@ ext3_xattr_cmp(struct ext3_xattr_header * not found or an error occurred. */ static struct buffer_head * -ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) +ext3_xattr_cache_find(handle_t *handle, struct inode *inode, + struct ext3_xattr_header *header, int *credits) { __u32 hash = le32_to_cpu(header->h_hash); struct mb_cache_entry *ce; @@ -1039,18 +1050,28 @@ ext3_xattr_cache_find(struct inode *inod ext3_error(inode->i_sb, "ext3_xattr_cache_find", "inode %ld: block %ld read error", inode->i_ino, (unsigned long) ce->e_block); - } else if (le32_to_cpu(HDR(bh)->h_refcount) > - EXT3_XATTR_REFCOUNT_MAX) { - ea_idebug(inode, "block %ld refcount %d>%d", - (unsigned long) ce->e_block, - le32_to_cpu(HDR(bh)->h_refcount), - EXT3_XATTR_REFCOUNT_MAX); - } else if (!ext3_xattr_cmp(header, HDR(bh))) { - ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); - mb_cache_entry_release(ce); - return bh; + } else { + /* ext3_journal_get_write_access() requires an unlocked + * bh, which complicates things here. */ + if (ext3_journal_get_write_access_credits(handle, bh, + credits) != 0) + return NULL; + lock_buffer(bh); + if (le32_to_cpu(HDR(bh)->h_refcount) > + EXT3_XATTR_REFCOUNT_MAX) { + ea_idebug(inode, "block %ld refcount %d>%d", + (unsigned long) ce->e_block, + le32_to_cpu(HDR(bh)->h_refcount), + EXT3_XATTR_REFCOUNT_MAX); + } else if (!ext3_xattr_cmp(header, HDR(bh))) { + mb_cache_entry_release(ce); + /* buffer will be unlocked by caller */ + return bh; + } + unlock_buffer(bh); + journal_release_buffer(handle, bh, *credits); + brelse(bh); } - brelse(bh); ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); } return NULL; diff -puN fs/ext3/xattr_user.c~xattr-fine-grained-locking fs/ext3/xattr_user.c --- 25/fs/ext3/xattr_user.c~xattr-fine-grained-locking 2003-07-04 22:31:27.000000000 -0700 +++ 25-akpm/fs/ext3/xattr_user.c 2003-07-04 22:31:27.000000000 -0700 @@ -13,10 +13,6 @@ #include #include "xattr.h" -#ifdef CONFIG_EXT3_FS_POSIX_ACL -# include "acl.h" -#endif - #define XATTR_USER_PREFIX "user." static size_t @@ -46,11 +42,7 @@ ext3_xattr_user_get(struct inode *inode, return -EINVAL; if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; -#ifdef CONFIG_EXT3_FS_POSIX_ACL - error = ext3_permission_locked(inode, MAY_READ); -#else error = permission(inode, MAY_READ, NULL); -#endif if (error) return error; @@ -70,11 +62,7 @@ ext3_xattr_user_set(struct inode *inode, if ( !S_ISREG(inode->i_mode) && (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) return -EPERM; -#ifdef CONFIG_EXT3_FS_POSIX_ACL - error = ext3_permission_locked(inode, MAY_WRITE); -#else error = permission(inode, MAY_WRITE, NULL); -#endif if (error) return error; diff -puN include/linux/ext3_fs_i.h~xattr-fine-grained-locking include/linux/ext3_fs_i.h --- 25/include/linux/ext3_fs_i.h~xattr-fine-grained-locking 2003-07-04 22:31:27.000000000 -0700 +++ 25-akpm/include/linux/ext3_fs_i.h 2003-07-04 22:31:27.000000000 -0700 @@ -62,6 +62,16 @@ struct ext3_inode_info { __u32 i_prealloc_count; #endif __u32 i_dir_start_lookup; +#ifdef CONFIG_EXT3_FS_XATTR + /* + * Extended attributes can be read independently of the main file + * data. Taking i_sem even when reading would cause contention + * between readers of EAs and writers of regular file data, so + * instead we synchronize on xattr_sem when reading or changing + * EAs. + */ + struct rw_semaphore xattr_sem; +#endif #ifdef CONFIG_EXT3_FS_POSIX_ACL struct posix_acl *i_acl; struct posix_acl *i_default_acl; _