From: Jan Kara Journalled quota support for ext3: The patch consists of two parts - ext3 changes and changes in generic quota code. The main idea of the changes is that a transaction is always started before any operation which changes quota file and dirtifying of the quota causes its write to disk. These two changes assure that quota change is journalled into the same transaction as the file change and hence after journal replay quota is consistent with the filesystem state. As during journal replay inodes from orphan list are deleted/truncated we have to do quota_on before the replay of the orphan list - this problem is solved by additional mount options to ext3 with quota file names and format. Some changes in generic code were also needed to assure that quota structure in file is always allocated and so ordinary quota operations (like adding/deleting a block/inode) need only a few blocks from the transaction. --- 25-akpm/fs/Kconfig | 11 25-akpm/fs/dquot.c | 419 ++++++++++++++++++++++++++----------- 25-akpm/fs/ext3/inode.c | 25 ++ 25-akpm/fs/ext3/namei.c | 26 +- 25-akpm/fs/ext3/super.c | 357 ++++++++++++++++++++++++++----- 25-akpm/fs/quota_v1.c | 7 25-akpm/fs/quota_v2.c | 72 +++--- 25-akpm/fs/stat.c | 2 25-akpm/include/linux/ext3_fs_sb.h | 4 25-akpm/include/linux/ext3_jbd.h | 18 + 25-akpm/include/linux/quota.h | 53 ++-- 25-akpm/include/linux/quotaops.h | 35 ++- 12 files changed, 768 insertions(+), 261 deletions(-) diff -puN fs/dquot.c~ext3-journalled-quotas fs/dquot.c --- 25/fs/dquot.c~ext3-journalled-quotas 2004-04-14 18:37:51.795140200 -0700 +++ 25-akpm/fs/dquot.c 2004-04-14 18:37:51.822136096 -0700 @@ -1,16 +1,13 @@ /* - * Implementation of the diskquota system for the LINUX operating - * system. QUOTA is implemented using the BSD system call interface as - * the means of communication with the user level. Currently only the - * ext2 filesystem has support for disk quotas. Other filesystems may - * be added in the future. This file contains the generic routines - * called by the different filesystems on allocation of an inode or - * block. These routines take care of the administration needed to - * have a consistent diskquota tracking system. The ideas of both - * user and group quotas are based on the Melbourne quota system as - * used on BSD derived systems. The internal implementation is - * based on one of the several variants of the LINUX inode-subsystem - * with added complexity of the diskquota system. + * Implementation of the diskquota system for the LINUX operating system. QUOTA + * is implemented using the BSD system call interface as the means of + * communication with the user level. This file contains the generic routines + * called by the different filesystems on allocation of an inode or block. + * These routines take care of the administration needed to have a consistent + * diskquota tracking system. The ideas of both user and group quotas are based + * on the Melbourne quota system as used on BSD derived systems. The internal + * implementation is based on one of the several variants of the LINUX + * inode-subsystem with added complexity of the diskquota system. * * Version: $Id: dquot.c,v 6.3 1996/11/17 18:35:34 mvw Exp mvw $ * @@ -52,6 +49,9 @@ * New SMP locking. * Jan Kara, , 10/2002 * + * Added journalled quota support + * Jan Kara, , 2003,2004 + * * (C) Copyright 1994 - 1997 Marco van Wieringen */ @@ -104,13 +104,17 @@ * * Each dquot has its dq_lock semaphore. Locked dquots might not be referenced * from inodes (dquot_alloc_space() and such don't check the dq_lock). - * Currently dquot is locked only when it is being read to memory on the first - * dqget(). Write operations on dquots don't hold dq_lock as they copy data - * under dq_data_lock spinlock to internal buffers before writing. + * Currently dquot is locked only when it is being read to memory (or space for + * it is being allocated) on the first dqget() and when it is being released on + * the last dqput(). The allocation and release oparations are serialized by + * the dq_lock and by checking the use count in dquot_release(). Write + * operations on dquots don't hold dq_lock as they copy data under dq_data_lock + * spinlock to internal buffers before writing. * * Lock ordering (including journal_lock) is following: * dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > dqio_sem */ + spinlock_t dq_list_lock = SPIN_LOCK_UNLOCKED; spinlock_t dq_data_lock = SPIN_LOCK_UNLOCKED; @@ -256,6 +260,9 @@ static inline void remove_inuse(struct d dqstats.allocated_dquots--; list_del(&dquot->dq_inuse); } +/* + * End of list functions needing dq_list_lock + */ static void wait_on_dquot(struct dquot *dquot) { @@ -263,34 +270,98 @@ static void wait_on_dquot(struct dquot * up(&dquot->dq_lock); } -static int read_dqblk(struct dquot *dquot) +#define mark_dquot_dirty(dquot) ((dquot)->dq_sb->dq_op->mark_dirty(dquot)) + +/* No locks needed here as ANY_DQUOT_DIRTY is used just by sync and so the + * worst what can happen is that dquot is not written by concurrent sync... */ +int dquot_mark_dquot_dirty(struct dquot *dquot) +{ + set_bit(DQ_MOD_B, &(dquot)->dq_flags); + set_bit(DQF_ANY_DQUOT_DIRTY_B, &(sb_dqopt((dquot)->dq_sb)-> + info[(dquot)->dq_type].dqi_flags)); + return 0; +} + +void mark_info_dirty(struct super_block *sb, int type) { - int ret; + set_bit(DQF_INFO_DIRTY_B, &sb_dqopt(sb)->info[type].dqi_flags); +} +EXPORT_SYMBOL(mark_info_dirty); + +/* + * Read dquot from disk and alloc space for it + */ + +int dquot_acquire(struct dquot *dquot) +{ + int ret = 0; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); down(&dquot->dq_lock); down(&dqopt->dqio_sem); - ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot); + if (!test_bit(DQ_READ_B, &dquot->dq_flags)) + ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot); + if (ret < 0) + goto out_iolock; + set_bit(DQ_READ_B, &dquot->dq_flags); + /* Instantiate dquot if needed */ + if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && !dquot->dq_off) { + ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); + if (ret < 0) + goto out_iolock; + } + set_bit(DQ_ACTIVE_B, &dquot->dq_flags); +out_iolock: up(&dqopt->dqio_sem); up(&dquot->dq_lock); return ret; } -static int commit_dqblk(struct dquot *dquot) +/* + * Write dquot to disk + */ +int dquot_commit(struct dquot *dquot) { - int ret; + int ret = 0; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); down(&dqopt->dqio_sem); - ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); + clear_bit(DQ_MOD_B, &dquot->dq_flags); + /* Inactive dquot can be only if there was error during read/init + * => we have better not writing it */ + if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) + ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); up(&dqopt->dqio_sem); + if (info_dirty(&dqopt->info[dquot->dq_type])) + dquot->dq_sb->dq_op->write_info(dquot->dq_sb, dquot->dq_type); + return ret; +} + +/* + * Release dquot + */ +int dquot_release(struct dquot *dquot) +{ + int ret = 0; + struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); + + down(&dquot->dq_lock); + /* Check whether we are not racing with some other dqget() */ + if (atomic_read(&dquot->dq_count) > 1) + goto out_dqlock; + down(&dqopt->dqio_sem); + ret = dqopt->ops[dquot->dq_type]->release_dqblk(dquot); + clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); + up(&dqopt->dqio_sem); +out_dqlock: + up(&dquot->dq_lock); return ret; } /* Invalidate all dquots on the list. Note that this function is called after - * quota is disabled so no new quota might be created. Because we hold - * dqonoff_sem and pointers were already removed from inodes we actually know - * that no quota for this sb+type should be held. */ + * quota is disabled and pointers from inodes removed so there cannot be new + * quota users. Also because we hold dqonoff_sem there can be no quota users + * for this sb+type at all. */ static void invalidate_dquots(struct super_block *sb, int type) { struct dquot *dquot; @@ -317,7 +388,7 @@ static void invalidate_dquots(struct sup spin_unlock(&dq_list_lock); } -static int vfs_quota_sync(struct super_block *sb, int type) +int vfs_quota_sync(struct super_block *sb, int type) { struct list_head *head; struct dquot *dquot; @@ -328,9 +399,11 @@ static int vfs_quota_sync(struct super_b restart: /* At this point any dirty dquot will definitely be written so we can clear dirty flag from info */ + spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt)) clear_bit(DQF_ANY_DQUOT_DIRTY_B, &dqopt->info[cnt].dqi_flags); + spin_unlock(&dq_data_lock); spin_lock(&dq_list_lock); list_for_each(head, &inuse_list) { dquot = list_entry(head, struct dquot, dq_inuse); @@ -338,10 +411,13 @@ restart: continue; if (type != -1 && dquot->dq_type != type) continue; - if (!dquot->dq_sb) /* Invalidated? */ - continue; if (!dquot_dirty(dquot)) continue; + /* Dirty and inactive can be only bad dquot... */ + if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) + continue; + /* Now we have active dquot from which someone is holding reference so we + * can safely just increase use count */ atomic_inc(&dquot->dq_count); dqstats.lookups++; spin_unlock(&dq_list_lock); @@ -352,11 +428,9 @@ restart: spin_unlock(&dq_list_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt) && info_dirty(&dqopt->info[cnt])) { - down(&dqopt->dqio_sem); - dqopt->ops[cnt]->write_file_info(sb, cnt); - up(&dqopt->dqio_sem); - } + if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt) + && info_dirty(&dqopt->info[cnt])) + sb->dq_op->write_info(sb, cnt); spin_lock(&dq_list_lock); dqstats.syncs++; spin_unlock(&dq_list_lock); @@ -431,11 +505,20 @@ we_slept: spin_unlock(&dq_list_lock); return; } - if (dquot_dirty(dquot)) { + /* Need to release dquot? */ + if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); + /* Commit dquot before releasing */ dquot->dq_sb->dq_op->write_dquot(dquot); goto we_slept; } + /* Clear flag in case dquot was inactive (something bad happened) */ + clear_bit(DQ_MOD_B, &dquot->dq_flags); + if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { + spin_unlock(&dq_list_lock); + dquot_release(dquot); + goto we_slept; + } atomic_dec(&dquot->dq_count); #ifdef __DQUOT_PARANOIA /* sanity check */ @@ -494,7 +577,6 @@ we_slept: insert_dquot_hash(dquot); dqstats.lookups++; spin_unlock(&dq_list_lock); - read_dqblk(dquot); } else { if (!atomic_read(&dquot->dq_count)) remove_free_dquot(dquot); @@ -502,11 +584,17 @@ we_slept: dqstats.cache_hits++; dqstats.lookups++; spin_unlock(&dq_list_lock); - wait_on_dquot(dquot); if (empty) kmem_cache_free(dquot_cachep, empty); } - + /* Wait for dq_lock - after this we know that either dquot_release() is already + * finished or it will be canceled due to dq_count > 1 test */ + wait_on_dquot(dquot); + /* Read the dquot and instantiate it (everything done only if needed) */ + if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_acquire(dquot) < 0) { + dqput(dquot); + return NODQUOT; + } #ifdef __DQUOT_PARANOIA if (!dquot->dq_sb) /* Has somebody invalidated entry under us? */ BUG(); @@ -540,12 +628,10 @@ restart: struct file *filp = list_entry(p, struct file, f_list); struct inode *inode = filp->f_dentry->d_inode; if (filp->f_mode & FMODE_WRITE && dqinit_needed(inode, type)) { - struct vfsmount *mnt = mntget(filp->f_vfsmnt); struct dentry *dentry = dget(filp->f_dentry); file_list_unlock(); sb->dq_op->initialize(inode, type); dput(dentry); - mntput(mnt); /* As we may have blocked we had better restart... */ goto restart; } @@ -627,13 +713,11 @@ static void drop_dquot_ref(struct super_ static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number) { dquot->dq_dqb.dqb_curinodes += number; - mark_dquot_dirty(dquot); } static inline void dquot_incr_space(struct dquot *dquot, qsize_t number) { dquot->dq_dqb.dqb_curspace += number; - mark_dquot_dirty(dquot); } static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number) @@ -645,7 +729,6 @@ static inline void dquot_decr_inodes(str if (dquot->dq_dqb.dqb_curinodes < dquot->dq_dqb.dqb_isoftlimit) dquot->dq_dqb.dqb_itime = (time_t) 0; clear_bit(DQ_INODES_B, &dquot->dq_flags); - mark_dquot_dirty(dquot); } static inline void dquot_decr_space(struct dquot *dquot, qsize_t number) @@ -657,7 +740,6 @@ static inline void dquot_decr_space(stru if (toqb(dquot->dq_dqb.dqb_curspace) < dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = (time_t) 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); - mark_dquot_dirty(dquot); } static inline int need_print_warning(struct dquot *dquot) @@ -810,25 +892,22 @@ static int check_bdq(struct dquot *dquot } /* - * Externally referenced functions through dquot_operations in inode. - * - * Note: this is a blocking operation. + * Initialize quota pointers in inode + * Transaction must be started at entry */ -void dquot_initialize(struct inode *inode, int type) +int dquot_initialize(struct inode *inode, int type) { unsigned int id = 0; - int cnt; + int cnt, ret = 0; - /* Solve deadlock when we recurse when holding dqptr_sem... */ + /* First test before acquiring semaphore - solves deadlocks when we + * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) - return; + return 0; down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); /* Having dqptr_sem we know NOQUOTA flags can't be altered... */ - if (IS_NOQUOTA(inode)) { - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); - return; - } - /* Build list of quotas to initialize... */ + if (IS_NOQUOTA(inode)) + goto out_err; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; @@ -846,14 +925,16 @@ void dquot_initialize(struct inode *inod inode->i_flags |= S_QUOTA; } } +out_err: up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + return ret; } /* * Release all quotas referenced by inode * Transaction must be started at an entry */ -void dquot_drop(struct inode *inode) +int dquot_drop(struct inode *inode) { int cnt; @@ -866,9 +947,19 @@ void dquot_drop(struct inode *inode) } } up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + return 0; } /* + * Following four functions update i_blocks+i_bytes fields and + * quota information (together with appropriate checks) + * NOTE: We absolutely rely on the fact that caller dirties + * the inode (usually macros in quotaops.h care about this) and + * holds a handle for the current transaction so that dquot write and + * inode write go into the same transaction. + */ + +/* * This operation can block, but only after everything is updated */ int dquot_alloc_space(struct inode *inode, qsize_t number, int warn) @@ -876,8 +967,10 @@ int dquot_alloc_space(struct inode *inod int cnt, ret = NO_QUOTA; char warntype[MAXQUOTAS]; - /* Solve deadlock when we recurse when holding dqptr_sem... */ + /* First test before acquiring semaphore - solves deadlocks when we + * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) { +out_add: inode_add_bytes(inode, number); return QUOTA_OK; } @@ -885,10 +978,11 @@ int dquot_alloc_space(struct inode *inod warntype[cnt] = NOWARN; down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + if (IS_NOQUOTA(inode)) { /* Now we can do reliable test... */ + up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + goto out_add; + } spin_lock(&dq_data_lock); - /* Now recheck reliably when holding dqptr_sem */ - if (IS_NOQUOTA(inode)) - goto add_bytes; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt] == NODQUOT) continue; @@ -900,11 +994,15 @@ int dquot_alloc_space(struct inode *inod continue; dquot_incr_space(inode->i_dquot[cnt], number); } -add_bytes: inode_add_bytes(inode, number); ret = QUOTA_OK; warn_put_all: spin_unlock(&dq_data_lock); + if (ret == QUOTA_OK) + /* Dirtify all the dquots - this can block when journalling */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (inode->i_dquot[cnt]) + mark_dquot_dirty(inode->i_dquot[cnt]); flush_warnings(inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return ret; @@ -918,13 +1016,13 @@ int dquot_alloc_inode(const struct inode int cnt, ret = NO_QUOTA; char warntype[MAXQUOTAS]; - /* Solve deadlock when we recurse when holding dqptr_sem... */ + /* First test before acquiring semaphore - solves deadlocks when we + * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) return QUOTA_OK; for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = NOWARN; down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return QUOTA_OK; @@ -945,6 +1043,11 @@ int dquot_alloc_inode(const struct inode ret = QUOTA_OK; warn_put_all: spin_unlock(&dq_data_lock); + if (ret == QUOTA_OK) + /* Dirtify all the dquots - this can block when journalling */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (inode->i_dquot[cnt]) + mark_dquot_dirty(inode->i_dquot[cnt]); flush_warnings((struct dquot **)inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return ret; @@ -953,46 +1056,55 @@ warn_put_all: /* * This is a non-blocking operation. */ -void dquot_free_space(struct inode *inode, qsize_t number) +int dquot_free_space(struct inode *inode, qsize_t number) { unsigned int cnt; - /* Solve deadlock when we recurse when holding dqptr_sem... */ + /* First test before acquiring semaphore - solves deadlocks when we + * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) { +out_sub: inode_sub_bytes(inode, number); - return; + return QUOTA_OK; } down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - spin_lock(&dq_data_lock); /* Now recheck reliably when holding dqptr_sem */ - if (IS_NOQUOTA(inode)) - goto sub_bytes; + if (IS_NOQUOTA(inode)) { + up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + goto out_sub; + } + spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt] == NODQUOT) continue; dquot_decr_space(inode->i_dquot[cnt], number); } -sub_bytes: inode_sub_bytes(inode, number); spin_unlock(&dq_data_lock); + /* Dirtify all the dquots - this can block when journalling */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (inode->i_dquot[cnt]) + mark_dquot_dirty(inode->i_dquot[cnt]); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + return QUOTA_OK; } /* * This is a non-blocking operation. */ -void dquot_free_inode(const struct inode *inode, unsigned long number) +int dquot_free_inode(const struct inode *inode, unsigned long number) { unsigned int cnt; - /* Solve deadlock when we recurse when holding dqptr_sem... */ + /* First test before acquiring semaphore - solves deadlocks when we + * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) - return; + return QUOTA_OK; down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - return; + return QUOTA_OK; } spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1001,7 +1113,12 @@ void dquot_free_inode(const struct inode dquot_decr_inodes(inode->i_dquot[cnt], number); } spin_unlock(&dq_data_lock); + /* Dirtify all the dquots - this can block when journalling */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (inode->i_dquot[cnt]) + mark_dquot_dirty(inode->i_dquot[cnt]); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + return QUOTA_OK; } /* @@ -1018,7 +1135,8 @@ int dquot_transfer(struct inode *inode, chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid; char warntype[MAXQUOTAS]; - /* Solve deadlock when we recurse when holding dqptr_sem... */ + /* First test before acquiring semaphore - solves deadlocks when we + * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) return QUOTA_OK; /* Clear the arrays */ @@ -1026,15 +1144,15 @@ int dquot_transfer(struct inode *inode, transfer_to[cnt] = transfer_from[cnt] = NODQUOT; warntype[cnt] = NOWARN; } - down(&sb_dqopt(inode->i_sb)->dqonoff_sem); down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); - up(&sb_dqopt(inode->i_sb)->dqonoff_sem); return QUOTA_OK; } - /* First build the transfer_to list - here we can block on reading of dquots... */ + /* First build the transfer_to list - here we can block on + * reading/instantiating of dquots. We know that the transaction for + * us was already started so we don't violate lock ranking here */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { switch (cnt) { case USRQUOTA: @@ -1082,7 +1200,13 @@ int dquot_transfer(struct inode *inode, ret = QUOTA_OK; warn_put_all: spin_unlock(&dq_data_lock); - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + /* Dirtify all the dquots - this can block when journalling */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (transfer_from[cnt]) + mark_dquot_dirty(transfer_from[cnt]); + if (transfer_to[cnt]) + mark_dquot_dirty(transfer_to[cnt]); + } flush_warnings(transfer_to, warntype); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1091,7 +1215,21 @@ warn_put_all: if (ret == NO_QUOTA && transfer_to[cnt] != NODQUOT) dqput(transfer_to[cnt]); } - up(&sb_dqopt(inode->i_sb)->dqonoff_sem); + up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + return ret; +} + +/* + * Write info of quota file to disk + */ +int dquot_commit_info(struct super_block *sb, int type) +{ + int ret; + struct quota_info *dqopt = sb_dqopt(sb); + + down(&dqopt->dqio_sem); + ret = dqopt->ops[type]->write_file_info(sb, type); + up(&dqopt->dqio_sem); return ret; } @@ -1099,22 +1237,18 @@ warn_put_all: * Definitions of diskquota operations. */ struct dquot_operations dquot_operations = { - .initialize = dquot_initialize, /* mandatory */ - .drop = dquot_drop, /* mandatory */ + .initialize = dquot_initialize, + .drop = dquot_drop, .alloc_space = dquot_alloc_space, .alloc_inode = dquot_alloc_inode, .free_space = dquot_free_space, .free_inode = dquot_free_inode, .transfer = dquot_transfer, - .write_dquot = commit_dqblk + .write_dquot = dquot_commit, + .mark_dirty = dquot_mark_dquot_dirty, + .write_info = dquot_commit_info }; -/* Function used by filesystems for initializing the dquot_operations structure */ -void init_dquot_operations(struct dquot_operations *fsdqops) -{ - memcpy(fsdqops, &dquot_operations, sizeof(dquot_operations)); -} - static inline void set_enable_flags(struct quota_info *dqopt, int type) { switch (type) { @@ -1166,17 +1300,14 @@ int vfs_quota_off(struct super_block *sb * Now all dquots should be invalidated, all writes done so we should be only * users of the info. No locks needed. */ - if (info_dirty(&dqopt->info[cnt])) { - down(&dqopt->dqio_sem); - dqopt->ops[cnt]->write_file_info(sb, cnt); - up(&dqopt->dqio_sem); - } + if (info_dirty(&dqopt->info[cnt])) + sb->dq_op->write_info(sb, cnt); if (dqopt->ops[cnt]->free_file_info) dqopt->ops[cnt]->free_file_info(sb, cnt); put_quota_format(dqopt->info[cnt].dqi_format); fput(dqopt->files[cnt]); - dqopt->files[cnt] = (struct file *)NULL; + dqopt->files[cnt] = NULL; dqopt->info[cnt].dqi_flags = 0; dqopt->info[cnt].dqi_igrace = 0; dqopt->info[cnt].dqi_bgrace = 0; @@ -1187,33 +1318,30 @@ out: return 0; } -int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) +/* + * Turn quotas on on a device + */ + +/* Helper function when we already have file open */ +static int vfs_quota_on_file(struct file *f, int type, int format_id) { - struct file *f; + struct quota_format_type *fmt = find_quota_format(format_id); struct inode *inode; + struct super_block *sb = f->f_dentry->d_sb; struct quota_info *dqopt = sb_dqopt(sb); - struct quota_format_type *fmt = find_quota_format(format_id); - int error, cnt; struct dquot *to_drop[MAXQUOTAS]; + int error, cnt; unsigned int oldflags; if (!fmt) return -ESRCH; - f = filp_open(path, O_RDWR, 0600); - if (IS_ERR(f)) { - error = PTR_ERR(f); - goto out_fmt; - } error = -EIO; if (!f->f_op || !f->f_op->read || !f->f_op->write) - goto out_f; - error = security_quota_on(f); - if (error) - goto out_f; + goto out_fmt; inode = f->f_dentry->d_inode; error = -EACCES; if (!S_ISREG(inode->i_mode)) - goto out_f; + goto out_fmt; down(&dqopt->dqonoff_sem); if (sb_has_quota_enabled(sb, type)) { @@ -1235,7 +1363,7 @@ int vfs_quota_on(struct super_block *sb, inode->i_flags &= ~S_QUOTA; up_write(&dqopt->dqptr_sem); /* We must put dquots outside of dqptr_sem because we may need to - * start transaction for write */ + * start transaction for dquot_release() */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (to_drop[cnt]) dqput(to_drop[cnt]); @@ -1262,14 +1390,58 @@ out_file_init: out_lock: up_write(&dqopt->dqptr_sem); up(&dqopt->dqonoff_sem); -out_f: - filp_close(f, NULL); out_fmt: put_quota_format(fmt); return error; } +/* Actual function called from quotactl() */ +int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) +{ + struct file *f; + int error; + + f = filp_open(path, O_RDWR, 0600); + if (IS_ERR(f)) + return PTR_ERR(f); + error = security_quota_on(f); + if (error) + goto out_f; + error = vfs_quota_on_file(f, type, format_id); + if (!error) + return 0; +out_f: + filp_close(f, NULL); + return error; +} + +/* + * Function used by filesystems when filp_open() would fail (filesystem is + * being mounted now). We will use a private file structure. Caller is + * responsible that it's IO functions won't need vfsmnt structure or + * some dentry tricks... + */ +int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry) +{ + struct file *f; + int error; + + dget(dentry); /* Get a reference for struct file */ + f = dentry_open(dentry, NULL, O_RDWR); + if (IS_ERR(f)) { + error = PTR_ERR(f); + goto out_dentry; + } + error = vfs_quota_on_file(f, type, format_id); + if (!error) + return 0; + fput(f); +out_dentry: + dput(dentry); + return error; +} + /* Generic routine for getting common part of quota structure */ static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) { @@ -1353,8 +1525,8 @@ static void do_set_dqblk(struct dquot *d clear_bit(DQ_FAKE_B, &dquot->dq_flags); else set_bit(DQ_FAKE_B, &dquot->dq_flags); - mark_dquot_dirty(dquot); spin_unlock(&dq_data_lock); + mark_dquot_dirty(dquot); } int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) @@ -1411,8 +1583,10 @@ int vfs_set_dqinfo(struct super_block *s mi->dqi_igrace = ii->dqi_igrace; if (ii->dqi_valid & IIF_FLAGS) mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | (ii->dqi_flags & DQF_MASK); - mark_info_dirty(mi); spin_unlock(&dq_data_lock); + mark_info_dirty(sb, type); + /* Force write to disk */ + sb->dq_op->write_info(sb, type); up(&sb_dqopt(sb)->dqonoff_sem); return 0; } @@ -1544,4 +1718,21 @@ EXPORT_SYMBOL(unregister_quota_format); EXPORT_SYMBOL(dqstats); EXPORT_SYMBOL(dq_list_lock); EXPORT_SYMBOL(dq_data_lock); -EXPORT_SYMBOL(init_dquot_operations); +EXPORT_SYMBOL(vfs_quota_on); +EXPORT_SYMBOL(vfs_quota_on_mount); +EXPORT_SYMBOL(vfs_quota_off); +EXPORT_SYMBOL(vfs_quota_sync); +EXPORT_SYMBOL(vfs_get_dqinfo); +EXPORT_SYMBOL(vfs_set_dqinfo); +EXPORT_SYMBOL(vfs_get_dqblk); +EXPORT_SYMBOL(vfs_set_dqblk); +EXPORT_SYMBOL(dquot_commit); +EXPORT_SYMBOL(dquot_commit_info); +EXPORT_SYMBOL(dquot_mark_dquot_dirty); +EXPORT_SYMBOL(dquot_initialize); +EXPORT_SYMBOL(dquot_drop); +EXPORT_SYMBOL(dquot_alloc_space); +EXPORT_SYMBOL(dquot_alloc_inode); +EXPORT_SYMBOL(dquot_free_space); +EXPORT_SYMBOL(dquot_free_inode); +EXPORT_SYMBOL(dquot_transfer); diff -puN fs/ext3/inode.c~ext3-journalled-quotas fs/ext3/inode.c --- 25/fs/ext3/inode.c~ext3-journalled-quotas 2004-04-14 18:37:51.797139896 -0700 +++ 25-akpm/fs/ext3/inode.c 2004-04-14 18:37:51.825135640 -0700 @@ -2772,9 +2772,28 @@ int ext3_setattr(struct dentry *dentry, if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + handle_t *handle; + + /* (user+group)*(old+new) structure, inode write (sb, + * inode block, ? - but truncate inode update has it) */ + handle = ext3_journal_start(inode, 4*EXT3_QUOTA_INIT_BLOCKS+3); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + goto err_out; + } error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; - if (error) + if (error) { + ext3_journal_stop(handle); return error; + } + /* Update corresponding info in inode so that everything is in + * one transaction */ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + error = ext3_mark_inode_dirty(handle, inode); + ext3_journal_stop(handle); } if (S_ISREG(inode->i_mode) && @@ -2853,7 +2872,9 @@ int ext3_writepage_trans_blocks(struct i ret = 2 * (bpp + indirects) + 2; #ifdef CONFIG_QUOTA - ret += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; + /* We know that structure was already allocated during DQUOT_INIT so + * we will be updating only the data blocks + inodes */ + ret += 2*EXT3_QUOTA_TRANS_BLOCKS; #endif return ret; diff -puN fs/ext3/namei.c~ext3-journalled-quotas fs/ext3/namei.c --- 25/fs/ext3/namei.c~ext3-journalled-quotas 2004-04-14 18:37:51.799139592 -0700 +++ 25-akpm/fs/ext3/namei.c 2004-04-14 18:37:51.827135336 -0700 @@ -1631,7 +1631,8 @@ static int ext3_create (struct inode * d int err; handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + + 2*EXT3_QUOTA_INIT_BLOCKS); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1661,7 +1662,8 @@ static int ext3_mknod (struct inode * di return -EINVAL; handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + + 2*EXT3_QUOTA_INIT_BLOCKS); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1693,7 +1695,8 @@ static int ext3_mkdir(struct inode * dir return -EMLINK; handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + + 2*EXT3_QUOTA_INIT_BLOCKS); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1972,6 +1975,9 @@ static int ext3_rmdir (struct inode * di struct ext3_dir_entry_2 * de; handle_t *handle; + /* Initialize quotas before so that eventual writes go in + * separate transaction */ + DQUOT_INIT(dentry->d_inode); handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1985,7 +1991,6 @@ static int ext3_rmdir (struct inode * di handle->h_sync = 1; inode = dentry->d_inode; - DQUOT_INIT(inode); retval = -EIO; if (le32_to_cpu(de->inode) != inode->i_ino) @@ -2029,6 +2034,9 @@ static int ext3_unlink(struct inode * di struct ext3_dir_entry_2 * de; handle_t *handle; + /* Initialize quotas before so that eventual writes go + * in separate transaction */ + DQUOT_INIT(dentry->d_inode); handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2042,7 +2050,6 @@ static int ext3_unlink(struct inode * di goto end_unlink; inode = dentry->d_inode; - DQUOT_INIT(inode); retval = -EIO; if (le32_to_cpu(de->inode) != inode->i_ino) @@ -2085,7 +2092,8 @@ static int ext3_symlink (struct inode * return -ENAMETOOLONG; handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5); + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + + 2*EXT3_QUOTA_INIT_BLOCKS); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2170,6 +2178,10 @@ static int ext3_rename (struct inode * o old_bh = new_bh = dir_bh = NULL; + /* Initialize quotas before so that eventual writes go + * in separate transaction */ + if (new_dentry->d_inode) + DQUOT_INIT(new_dentry->d_inode); handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); if (IS_ERR(handle)) @@ -2196,8 +2208,6 @@ static int ext3_rename (struct inode * o if (!new_inode) { brelse (new_bh); new_bh = NULL; - } else { - DQUOT_INIT(new_inode); } } if (S_ISDIR(old_inode->i_mode)) { diff -puN fs/ext3/super.c~ext3-journalled-quotas fs/ext3/super.c --- 25/fs/ext3/super.c~ext3-journalled-quotas 2004-04-14 18:37:51.803138984 -0700 +++ 25-akpm/fs/ext3/super.c 2004-04-14 18:37:51.831134728 -0700 @@ -32,6 +32,9 @@ #include #include #include +#include +#include +#include #include #include "xattr.h" #include "acl.h" @@ -504,7 +507,43 @@ static void ext3_clear_inode(struct inod # define ext3_clear_inode NULL #endif -static struct dquot_operations ext3_qops; +#ifdef CONFIG_QUOTA + +#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") +#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) + +static int ext3_dquot_initialize(struct inode *inode, int type); +static int ext3_dquot_drop(struct inode *inode); +static int ext3_write_dquot(struct dquot *dquot); +static int ext3_mark_dquot_dirty(struct dquot *dquot); +static int ext3_write_info(struct super_block *sb, int type); +static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path); +static int ext3_quota_on_mount(struct super_block *sb, int type); +static int ext3_quota_off_mount(struct super_block *sb, int type); + +static struct dquot_operations ext3_quota_operations = { + .initialize = ext3_dquot_initialize, + .drop = ext3_dquot_drop, + .alloc_space = dquot_alloc_space, + .alloc_inode = dquot_alloc_inode, + .free_space = dquot_free_space, + .free_inode = dquot_free_inode, + .transfer = dquot_transfer, + .write_dquot = ext3_write_dquot, + .mark_dirty = ext3_mark_dquot_dirty, + .write_info = ext3_write_info +}; + +static struct quotactl_ops ext3_qctl_operations = { + .quota_on = ext3_quota_on, + .quota_off = vfs_quota_off, + .quota_sync = vfs_quota_sync, + .get_info = vfs_get_dqinfo, + .set_info = vfs_set_dqinfo, + .get_dqblk = vfs_get_dqblk, + .set_dqblk = vfs_set_dqblk +}; +#endif static struct super_operations ext3_sops = { .alloc_inode = ext3_alloc_inode, @@ -536,6 +575,8 @@ enum { Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload, Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, + Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, + Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_ignore, Opt_err, }; @@ -571,6 +612,12 @@ static match_table_t tokens = { {Opt_data_journal, "data=journal"}, {Opt_data_ordered, "data=ordered"}, {Opt_data_writeback, "data=writeback"}, + {Opt_offusrjquota, "usrjquota="}, + {Opt_usrjquota, "usrjquota=%s"}, + {Opt_offgrpjquota, "grpjquota="}, + {Opt_grpjquota, "grpjquota=%s"}, + {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, + {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, {Opt_ignore, "grpquota"}, {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, @@ -598,13 +645,17 @@ static unsigned long get_sb_block(void * return sb_block; } -static int parse_options (char * options, struct ext3_sb_info *sbi, +static int parse_options (char * options, struct super_block *sb, unsigned long * inum, int is_remount) { + struct ext3_sb_info *sbi = EXT3_SB(sb); char * p; substring_t args[MAX_OPT_ARGS]; int data_opt = 0; int option; +#ifdef CONFIG_QUOTA + int qtype; +#endif if (!options) return 1; @@ -763,6 +814,76 @@ static int parse_options (char * options sbi->s_mount_opt |= data_opt; } break; +#ifdef CONFIG_QUOTA + case Opt_usrjquota: + qtype = USRQUOTA; + goto set_qf_name; + case Opt_grpjquota: + qtype = GRPQUOTA; +set_qf_name: + if (sb_any_quota_enabled(sb)) { + printk(KERN_ERR + "EXT3-fs: Cannot change journalled " + "quota options when quota turned on.\n"); + return 0; + } + if (sbi->s_qf_names[qtype]) { + printk(KERN_ERR + "EXT3-fs: %s quota file already " + "specified.\n", QTYPE2NAME(qtype)); + return 0; + } + sbi->s_qf_names[qtype] = match_strdup(&args[0]); + if (!sbi->s_qf_names[qtype]) { + printk(KERN_ERR + "EXT3-fs: not enough memory for " + "storing quotafile name.\n"); + return 0; + } + if (strchr(sbi->s_qf_names[qtype], '/')) { + printk(KERN_ERR + "EXT3-fs: quotafile must be on " + "filesystem root.\n"); + kfree(sbi->s_qf_names[qtype]); + sbi->s_qf_names[qtype] = NULL; + return 0; + } + break; + case Opt_offusrjquota: + qtype = USRQUOTA; + goto clear_qf_name; + case Opt_offgrpjquota: + qtype = GRPQUOTA; +clear_qf_name: + if (sb_any_quota_enabled(sb)) { + printk(KERN_ERR "EXT3-fs: Cannot change " + "journalled quota options when " + "quota turned on.\n"); + return 0; + } + if (sbi->s_qf_names[qtype]) { + kfree(sbi->s_qf_names[qtype]); + sbi->s_qf_names[qtype] = NULL; + } + break; + case Opt_jqfmt_vfsold: + sbi->s_jquota_fmt = QFMT_VFS_OLD; + break; + case Opt_jqfmt_vfsv0: + sbi->s_jquota_fmt = QFMT_VFS_V0; + break; +#else + case Opt_usrjquota: + case Opt_grpjquota: + case Opt_offusrjquota: + case Opt_offgrpjquota: + case Opt_jqfmt_vfsold: + case Opt_jqfmt_vfsv0: + printk(KERN_ERR + "EXT3-fs: journalled quota options not " + "supported.\n"); + break; +#endif case Opt_abort: set_opt(sbi->s_mount_opt, ABORT); break; @@ -775,6 +896,13 @@ static int parse_options (char * options return 0; } } +#ifdef CONFIG_QUOTA + if (!sbi->s_jquota_fmt && (sbi->s_qf_names[0] || sbi->s_qf_names[1])) { + printk(KERN_ERR + "EXT3-fs: journalled quota format not specified.\n"); + return 0; + } +#endif return 1; } @@ -934,6 +1062,9 @@ static void ext3_orphan_cleanup (struct { unsigned int s_flags = sb->s_flags; int nr_orphans = 0, nr_truncates = 0; +#ifdef CONFIG_QUOTA + int i; +#endif if (!es->s_last_orphan) { jbd_debug(4, "no orphan inodes to clean up\n"); return; @@ -953,6 +1084,20 @@ static void ext3_orphan_cleanup (struct sb->s_id); sb->s_flags &= ~MS_RDONLY; } +#ifdef CONFIG_QUOTA + /* Needed for iput() to work correctly and not trash data */ + sb->s_flags |= MS_ACTIVE; + /* Turn on quotas so that they are updated correctly */ + for (i = 0; i < MAXQUOTAS; i++) { + if (EXT3_SB(sb)->s_qf_names[i]) { + int ret = ext3_quota_on_mount(sb, i); + if (ret < 0) + printk(KERN_ERR + "EXT3-fs: Cannot turn on journalled " + "quota: error %d\n", ret); + } + } +#endif while (es->s_last_orphan) { struct inode *inode; @@ -964,6 +1109,7 @@ static void ext3_orphan_cleanup (struct } list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); + DQUOT_INIT(inode); if (inode->i_nlink) { printk(KERN_DEBUG "%s: truncating inode %ld to %Ld bytes\n", @@ -991,6 +1137,13 @@ static void ext3_orphan_cleanup (struct if (nr_truncates) printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n", sb->s_id, PLURAL(nr_truncates)); +#ifdef CONFIG_QUOTA + /* Turn quotas off */ + for (i = 0; i < MAXQUOTAS; i++) { + if (sb_dqopt(sb)->files[i]) + ext3_quota_off_mount(sb, i); + } +#endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ } @@ -1124,7 +1277,7 @@ static int ext3_fill_super (struct super sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid); - if (!parse_options ((char *) data, sbi, &journal_inum, 0)) + if (!parse_options ((char *) data, sb, &journal_inum, 0)) goto failed_mount; sb->s_flags |= MS_ONE_SECOND; @@ -1303,7 +1456,10 @@ static int ext3_fill_super (struct super */ sb->s_op = &ext3_sops; sb->s_export_op = &ext3_export_ops; - sb->dq_op = &ext3_qops; +#ifdef CONFIG_QUOTA + sb->s_qcop = &ext3_qctl_operations; + sb->dq_op = &ext3_quota_operations; +#endif INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ sb->s_root = 0; @@ -1413,6 +1569,12 @@ failed_mount2: brelse(sbi->s_group_desc[i]); kfree(sbi->s_group_desc); failed_mount: +#ifdef CONFIG_QUOTA + for (i = 0; i < MAXQUOTAS; i++) { + if (sbi->s_qf_names[i]) + kfree(sbi->s_qf_names[i]); + } +#endif ext3_blkdev_remove(sbi); brelse(bh); out_fail: @@ -1839,7 +2001,7 @@ int ext3_remount (struct super_block * s /* * Allow the "check" option to be passed as a remount option. */ - if (!parse_options(data, sbi, &tmp, 1)) + if (!parse_options(data, sb, &tmp, 1)) return -EINVAL; if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) @@ -1959,70 +2121,152 @@ int ext3_statfs (struct super_block * sb #ifdef CONFIG_QUOTA -/* Blocks: (2 data blocks) * (3 indirect + 1 descriptor + 1 bitmap) + superblock */ -#define EXT3_OLD_QFMT_BLOCKS 11 -/* Blocks: quota info + (4 pointer blocks + 1 entry block) * (3 indirect + 1 descriptor + 1 bitmap) + superblock */ -#define EXT3_V0_QFMT_BLOCKS 27 - -static int (*old_write_dquot)(struct dquot *dquot); -static void (*old_drop_dquot)(struct inode *inode); - -static int fmt_to_blocks(int fmt) -{ - switch (fmt) { - case QFMT_VFS_OLD: - return EXT3_OLD_QFMT_BLOCKS; - case QFMT_VFS_V0: - return EXT3_V0_QFMT_BLOCKS; - } - return EXT3_MAX_TRANS_DATA; +static inline struct inode *dquot_to_inode(struct dquot *dquot) +{ + return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]->f_dentry->d_inode; } -static int ext3_write_dquot(struct dquot *dquot) +static int ext3_dquot_initialize(struct inode *inode, int type) { - int nblocks; - int ret; - int err; handle_t *handle; - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); - struct inode *qinode; + int ret, err; - nblocks = fmt_to_blocks(dqopt->info[dquot->dq_type].dqi_format->qf_fmt_id); - qinode = dqopt->files[dquot->dq_type]->f_dentry->d_inode; - handle = ext3_journal_start(qinode, nblocks); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - ret = old_write_dquot(dquot); + /* We may create quota structure so we need to reserve enough blocks */ + handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ret = dquot_initialize(inode, type); err = ext3_journal_stop(handle); - if (ret == 0) + if (!ret) ret = err; -out: return ret; } -static void ext3_drop_dquot(struct inode *inode) +static int ext3_dquot_drop(struct inode *inode) { - int nblocks, type; - struct quota_info *dqopt = sb_dqopt(inode->i_sb); handle_t *handle; + int ret, err; - for (type = 0; type < MAXQUOTAS; type++) { - if (sb_has_quota_enabled(inode->i_sb, type)) - break; - } - if (type < MAXQUOTAS) - nblocks = fmt_to_blocks(dqopt->info[type].dqi_format->qf_fmt_id); + /* We may delete quota structure so we need to reserve enough blocks */ + handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ret = dquot_drop(inode); + err = ext3_journal_stop(handle); + if (!ret) + ret = err; + return ret; +} + +static int ext3_write_dquot(struct dquot *dquot) +{ + int ret, err; + handle_t *handle; + + handle = ext3_journal_start(dquot_to_inode(dquot), + EXT3_QUOTA_TRANS_BLOCKS); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ret = dquot_commit(dquot); + err = ext3_journal_stop(handle); + if (!ret) + ret = err; + return ret; +} + +static int ext3_mark_dquot_dirty(struct dquot * dquot) +{ + /* Are we journalling quotas? */ + if (EXT3_SB(dquot->dq_sb)->s_qf_names[0] || + EXT3_SB(dquot->dq_sb)->s_qf_names[1]) + return ext3_write_dquot(dquot); else - nblocks = 0; /* No quota => no drop */ - handle = ext3_journal_start(inode, 2*nblocks); + return dquot_mark_dquot_dirty(dquot); +} + +static int ext3_write_info(struct super_block *sb, int type) +{ + int ret, err; + handle_t *handle; + + /* Data block + inode block */ + handle = ext3_journal_start(sb->s_root->d_inode, 2); if (IS_ERR(handle)) - return; - old_drop_dquot(inode); - ext3_journal_stop(handle); - return; + return PTR_ERR(handle); + ret = dquot_commit_info(sb, type); + err = ext3_journal_stop(handle); + if (!ret) + ret = err; + return ret; +} + +/* + * Turn on quotas during mount time - we need to find + * the quota file and such... + */ +static int ext3_quota_on_mount(struct super_block *sb, int type) +{ + int err; + struct dentry *dentry; + struct qstr name = { .name = EXT3_SB(sb)->s_qf_names[type], + .hash = 0, + .len = strlen(EXT3_SB(sb)->s_qf_names[type])}; + + dentry = lookup_hash(&name, sb->s_root); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + err = vfs_quota_on_mount(type, EXT3_SB(sb)->s_jquota_fmt, dentry); + if (err) + dput(dentry); + /* We keep the dentry reference if everything went ok - we drop it + * on quota_off time */ + return err; +} + +/* Turn quotas off during mount time */ +static int ext3_quota_off_mount(struct super_block *sb, int type) +{ + int err; + struct dentry *dentry; + + dentry = sb_dqopt(sb)->files[type]->f_dentry; + err = vfs_quota_off_mount(sb, type); + /* We invalidate dentry - it has at least wrong hash... */ + d_invalidate(dentry); + dput(dentry); + return err; +} + +/* + * Standard function to be called on quota_on + */ +static int ext3_quota_on(struct super_block *sb, int type, int format_id, + char *path) +{ + int err; + struct nameidata nd; + + /* Not journalling quota? */ + if (!EXT3_SB(sb)->s_qf_names[0] && !EXT3_SB(sb)->s_qf_names[1]) + return vfs_quota_on(sb, type, format_id, path); + err = path_lookup(path, LOOKUP_FOLLOW, &nd); + if (err) + return err; + /* Quotafile not on the same filesystem? */ + if (nd.mnt->mnt_sb != sb) + return -EXDEV; + /* Quotafile not of fs root? */ + if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) + printk(KERN_WARNING + "EXT3-fs: Quota file not on filesystem root. " + "Journalled quota will not work.\n"); + if (!ext3_should_journal_data(nd.dentry->d_inode)) + printk(KERN_WARNING "EXT3-fs: Quota file does not have " + "data-journalling. Journalled quota will not work.\n"); + path_release(&nd); + return vfs_quota_on(sb, type, format_id, path); } + #endif static struct super_block *ext3_get_sb(struct file_system_type *fs_type, @@ -2047,13 +2291,6 @@ static int __init init_ext3_fs(void) err = init_inodecache(); if (err) goto out1; -#ifdef CONFIG_QUOTA - init_dquot_operations(&ext3_qops); - old_write_dquot = ext3_qops.write_dquot; - old_drop_dquot = ext3_qops.drop; - ext3_qops.write_dquot = ext3_write_dquot; - ext3_qops.drop = ext3_drop_dquot; -#endif err = register_filesystem(&ext3_fs_type); if (err) goto out; diff -puN fs/Kconfig~ext3-journalled-quotas fs/Kconfig --- 25/fs/Kconfig~ext3-journalled-quotas 2004-04-14 18:37:51.805138680 -0700 +++ 25-akpm/fs/Kconfig 2004-04-14 18:37:51.833134424 -0700 @@ -406,12 +406,15 @@ config QUOTA help If you say Y here, you will be able to set per user limits for disk usage (also called disk quotas). Currently, it works for the - ext2, ext3, and reiserfs file system. You need additional software - in order to use quota support (you can download sources from + ext2, ext3, and reiserfs file system. ext3 also supports journalled + quotas for which you don't need to run quotacheck(8) after an unclean + shutdown. You need additional software in order to use quota support + (you can download sources from ). For further details, read the Quota mini-HOWTO, available from - . Probably the quota - support is only useful for multi user systems. If unsure, say N. + , or the documentation provided + with the quota tools. Probably the quota support is only useful for + multi user systems. If unsure, say N. config QFMT_V1 tristate "Old quota format support" diff -puN fs/quota_v1.c~ext3-journalled-quotas fs/quota_v1.c --- 25/fs/quota_v1.c~ext3-journalled-quotas 2004-04-14 18:37:51.806138528 -0700 +++ 25-akpm/fs/quota_v1.c 2004-04-14 18:37:51.834134272 -0700 @@ -60,7 +60,7 @@ static int v1_read_dqblk(struct dquot *d v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk); if (dquot->dq_dqb.dqb_bhardlimit == 0 && dquot->dq_dqb.dqb_bsoftlimit == 0 && dquot->dq_dqb.dqb_ihardlimit == 0 && dquot->dq_dqb.dqb_isoftlimit == 0) - dquot->dq_flags |= DQ_FAKE; + set_bit(DQ_FAKE_B, &dquot->dq_flags); dqstats.reads++; return 0; @@ -80,12 +80,7 @@ static int v1_commit_dqblk(struct dquot fs = get_fs(); set_fs(KERNEL_DS); - /* - * Note: clear the DQ_MOD flag unconditionally, - * so we don't loop forever on failure. - */ v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb); - dquot->dq_flags &= ~DQ_MOD; if (dquot->dq_id == 0) { dqblk.dqb_btime = sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace; dqblk.dqb_itime = sb_dqopt(dquot->dq_sb)->info[type].dqi_igrace; diff -puN fs/quota_v2.c~ext3-journalled-quotas fs/quota_v2.c --- 25/fs/quota_v2.c~ext3-journalled-quotas 2004-04-14 18:37:51.808138224 -0700 +++ 25-akpm/fs/quota_v2.c 2004-04-14 18:37:51.836133968 -0700 @@ -65,7 +65,7 @@ static int v2_read_file_info(struct supe set_fs(fs); if (size != sizeof(struct v2_disk_dqinfo)) { printk(KERN_WARNING "Can't read info structure on device %s.\n", - f->f_vfsmnt->mnt_sb->s_id); + f->f_dentry->d_sb->s_id); return -1; } info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); @@ -87,10 +87,12 @@ static int v2_write_file_info(struct sup ssize_t size; loff_t offset = V2_DQINFOOFF; + spin_lock(&dq_data_lock); info->dqi_flags &= ~DQF_INFO_DIRTY; dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); + spin_unlock(&dq_data_lock); dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks); dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk); dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry); @@ -100,7 +102,7 @@ static int v2_write_file_info(struct sup set_fs(fs); if (size != sizeof(struct v2_disk_dqinfo)) { printk(KERN_WARNING "Can't write info structure on device %s.\n", - f->f_vfsmnt->mnt_sb->s_id); + f->f_dentry->d_sb->s_id); return -1; } return 0; @@ -173,9 +175,10 @@ static ssize_t write_blk(struct file *fi } /* Remove empty block from list and return it */ -static int get_free_dqblk(struct file *filp, struct mem_dqinfo *info) +static int get_free_dqblk(struct file *filp, int type) { dqbuf_t buf = getdqbuf(); + struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; int ret, blk; @@ -193,7 +196,7 @@ static int get_free_dqblk(struct file *f goto out_buf; blk = info->u.v2_i.dqi_blocks++; } - mark_info_dirty(info); + mark_info_dirty(filp->f_dentry->d_sb, type); ret = blk; out_buf: freedqbuf(buf); @@ -201,8 +204,9 @@ out_buf: } /* Insert empty block to the list */ -static int put_free_dqblk(struct file *filp, struct mem_dqinfo *info, dqbuf_t buf, uint blk) +static int put_free_dqblk(struct file *filp, int type, dqbuf_t buf, uint blk) { + struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; int err; @@ -210,16 +214,17 @@ static int put_free_dqblk(struct file *f dh->dqdh_prev_free = cpu_to_le32(0); dh->dqdh_entries = cpu_to_le16(0); info->u.v2_i.dqi_free_blk = blk; - mark_info_dirty(info); + mark_info_dirty(filp->f_dentry->d_sb, type); if ((err = write_blk(filp, blk, buf)) < 0) /* Some strange block. We had better leave it... */ return err; return 0; } /* Remove given block from the list of blocks with free entries */ -static int remove_free_dqentry(struct file *filp, struct mem_dqinfo *info, dqbuf_t buf, uint blk) +static int remove_free_dqentry(struct file *filp, int type, dqbuf_t buf, uint blk) { dqbuf_t tmpbuf = getdqbuf(); + struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free); int err; @@ -242,7 +247,7 @@ static int remove_free_dqentry(struct fi } else { info->u.v2_i.dqi_free_entry = nextblk; - mark_info_dirty(info); + mark_info_dirty(filp->f_dentry->d_sb, type); } freedqbuf(tmpbuf); dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); @@ -255,9 +260,10 @@ out_buf: } /* Insert given block to the beginning of list with free entries */ -static int insert_free_dqentry(struct file *filp, struct mem_dqinfo *info, dqbuf_t buf, uint blk) +static int insert_free_dqentry(struct file *filp, int type, dqbuf_t buf, uint blk) { dqbuf_t tmpbuf = getdqbuf(); + struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; int err; @@ -276,7 +282,7 @@ static int insert_free_dqentry(struct fi } freedqbuf(tmpbuf); info->u.v2_i.dqi_free_entry = blk; - mark_info_dirty(info); + mark_info_dirty(filp->f_dentry->d_sb, type); return 0; out_buf: freedqbuf(tmpbuf); @@ -307,7 +313,7 @@ static uint find_free_dqentry(struct dqu goto out_buf; } else { - blk = get_free_dqblk(filp, info); + blk = get_free_dqblk(filp, dquot->dq_type); if ((int)blk < 0) { *err = blk; freedqbuf(buf); @@ -315,10 +321,10 @@ static uint find_free_dqentry(struct dqu } memset(buf, 0, V2_DQBLKSIZE); info->u.v2_i.dqi_free_entry = blk; /* This is enough as block is already zeroed and entry list is empty... */ - mark_info_dirty(info); + mark_info_dirty(dquot->dq_sb, dquot->dq_type); } if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK) /* Block will be full? */ - if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) { + if ((*err = remove_free_dqentry(filp, dquot->dq_type, buf, blk)) < 0) { printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); goto out_buf; } @@ -349,7 +355,6 @@ out_buf: static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth) { struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; - struct mem_dqinfo *info = sb_dqopt(dquot->dq_sb)->info + dquot->dq_type; dqbuf_t buf; int ret = 0, newson = 0, newact = 0; u32 *ref; @@ -358,7 +363,7 @@ static int do_insert_tree(struct dquot * if (!(buf = getdqbuf())) return -ENOMEM; if (!*treeblk) { - ret = get_free_dqblk(filp, info); + ret = get_free_dqblk(filp, dquot->dq_type); if (ret < 0) goto out_buf; *treeblk = ret; @@ -392,7 +397,7 @@ static int do_insert_tree(struct dquot * ret = write_blk(filp, *treeblk, buf); } else if (newact && ret < 0) - put_free_dqblk(filp, info, buf, *treeblk); + put_free_dqblk(filp, dquot->dq_type, buf, *treeblk); out_buf: freedqbuf(buf); return ret; @@ -417,6 +422,7 @@ static int v2_write_dquot(struct dquot * ssize_t ret; struct v2_disk_dqblk ddquot; + /* dq_off is guarded by dqio_sem */ if (!dquot->dq_off) if ((ret = dq_insert_tree(dquot)) < 0) { printk(KERN_ERR "VFS: Error %Zd occurred while creating quota.\n", ret); @@ -424,7 +430,9 @@ static int v2_write_dquot(struct dquot * } filp = sb_dqopt(dquot->dq_sb)->files[type]; offset = dquot->dq_off; + spin_lock(&dq_data_lock); mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id); + spin_unlock(&dq_data_lock); fs = get_fs(); set_fs(KERNEL_DS); ret = filp->f_op->write(filp, (char *)&ddquot, sizeof(struct v2_disk_dqblk), &offset); @@ -445,7 +453,6 @@ static int v2_write_dquot(struct dquot * static int free_dqentry(struct dquot *dquot, uint blk) { struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; - struct mem_dqinfo *info = sb_dqopt(dquot->dq_sb)->info + dquot->dq_type; struct v2_disk_dqdbheader *dh; dqbuf_t buf = getdqbuf(); int ret = 0; @@ -463,8 +470,8 @@ static int free_dqentry(struct dquot *dq dh = (struct v2_disk_dqdbheader *)buf; dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1); if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ - if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 || - (ret = put_free_dqblk(filp, info, buf, blk)) < 0) { + if ((ret = remove_free_dqentry(filp, dquot->dq_type, buf, blk)) < 0 || + (ret = put_free_dqblk(filp, dquot->dq_type, buf, blk)) < 0) { printk(KERN_ERR "VFS: Can't move quota data block (%u) to free list.\n", blk); goto out_buf; } @@ -473,7 +480,7 @@ static int free_dqentry(struct dquot *dq memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0, sizeof(struct v2_disk_dqblk)); if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) { /* Insert will write block itself */ - if ((ret = insert_free_dqentry(filp, info, buf, blk)) < 0) { + if ((ret = insert_free_dqentry(filp, dquot->dq_type, buf, blk)) < 0) { printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk); goto out_buf; } @@ -494,7 +501,6 @@ out_buf: static int remove_tree(struct dquot *dquot, uint *blk, int depth) { struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; - struct mem_dqinfo *info = sb_dqopt(dquot->dq_sb)->info + dquot->dq_type; dqbuf_t buf = getdqbuf(); int ret = 0; uint newblk; @@ -518,7 +524,7 @@ static int remove_tree(struct dquot *dqu ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0); for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++); /* Block got empty? */ if (i == V2_DQBLKSIZE) { - put_free_dqblk(filp, info, buf, *blk); + put_free_dqblk(filp, dquot->dq_type, buf, *blk); *blk = 0; } else @@ -632,7 +638,7 @@ static int v2_read_dquot(struct dquot *d if (offset < 0) printk(KERN_ERR "VFS: Can't read quota structure for id %u.\n", dquot->dq_id); dquot->dq_off = 0; - dquot->dq_flags |= DQ_FAKE; + set_bit(DQ_FAKE_B, &dquot->dq_flags); memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); ret = offset; } @@ -650,21 +656,24 @@ static int v2_read_dquot(struct dquot *d ret = 0; set_fs(fs); disk2memdqb(&dquot->dq_dqb, &ddquot); + if (!dquot->dq_dqb.dqb_bhardlimit && + !dquot->dq_dqb.dqb_bsoftlimit && + !dquot->dq_dqb.dqb_ihardlimit && + !dquot->dq_dqb.dqb_isoftlimit) + set_bit(DQ_FAKE_B, &dquot->dq_flags); } dqstats.reads++; return ret; } -/* Commit changes of dquot to disk - it might also mean deleting it when quota became fake one and user has no blocks... */ -static int v2_commit_dquot(struct dquot *dquot) +/* Check whether dquot should not be deleted. We know we are + * the only one operating on dquot (thanks to dq_lock) */ +static int v2_release_dquot(struct dquot *dquot) { - /* We clear the flag everytime so we don't loop when there was an IO error... */ - dquot->dq_flags &= ~DQ_MOD; - if (dquot->dq_flags & DQ_FAKE && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace)) + if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace)) return v2_delete_dquot(dquot); - else - return v2_write_dquot(dquot); + return 0; } static struct quota_format_ops v2_format_ops = { @@ -673,7 +682,8 @@ static struct quota_format_ops v2_format .write_file_info = v2_write_file_info, .free_file_info = NULL, .read_dqblk = v2_read_dquot, - .commit_dqblk = v2_commit_dquot, + .commit_dqblk = v2_write_dquot, + .release_dqblk = v2_release_dquot, }; static struct quota_format_type v2_quota_format = { diff -puN fs/stat.c~ext3-journalled-quotas fs/stat.c --- 25/fs/stat.c~ext3-journalled-quotas 2004-04-14 18:37:51.809138072 -0700 +++ 25-akpm/fs/stat.c 2004-04-14 18:37:51.837133816 -0700 @@ -398,6 +398,8 @@ EXPORT_SYMBOL(inode_get_bytes); void inode_set_bytes(struct inode *inode, loff_t bytes) { + /* Caller is here responsible for sufficient locking + * (ie. inode->i_lock) */ inode->i_blocks = bytes >> 9; inode->i_bytes = bytes & 511; } diff -puN include/linux/ext3_fs_sb.h~ext3-journalled-quotas include/linux/ext3_fs_sb.h --- 25/include/linux/ext3_fs_sb.h~ext3-journalled-quotas 2004-04-14 18:37:51.810137920 -0700 +++ 25-akpm/include/linux/ext3_fs_sb.h 2004-04-14 18:37:51.837133816 -0700 @@ -69,6 +69,10 @@ struct ext3_sb_info { struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ #endif +#ifdef CONFIG_QUOTA + char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ + int s_jquota_fmt; /* Format of quota to use */ +#endif }; #endif /* _LINUX_EXT3_FS_SB */ diff -puN include/linux/ext3_jbd.h~ext3-journalled-quotas include/linux/ext3_jbd.h --- 25/include/linux/ext3_jbd.h~ext3-journalled-quotas 2004-04-14 18:37:51.811137768 -0700 +++ 25-akpm/include/linux/ext3_jbd.h 2004-04-14 18:37:51.838133664 -0700 @@ -42,8 +42,9 @@ * superblock only gets updated once, of course, so don't bother * counting that again for the quota updates. */ -#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ - EXT3_XATTR_TRANS_BLOCKS - 2) +#define EXT3_DATA_TRANS_BLOCKS (EXT3_SINGLEDATA_TRANS_BLOCKS + \ + EXT3_XATTR_TRANS_BLOCKS - 2 + \ + 2*EXT3_QUOTA_TRANS_BLOCKS) extern int ext3_writepage_trans_blocks(struct inode *inode); @@ -72,6 +73,19 @@ extern int ext3_writepage_trans_blocks(s #define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 +#ifdef CONFIG_QUOTA +/* Amount of blocks needed for quota update - we know that the structure was + * allocated so we need to update only inode+data */ +#define EXT3_QUOTA_TRANS_BLOCKS 2 +/* Amount of blocks needed for quota insert/delete - we do some block writes + * but inode, sb and group updates are done only once */ +#define EXT3_QUOTA_INIT_BLOCKS (DQUOT_MAX_WRITES*\ + (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3) +#else +#define EXT3_QUOTA_TRANS_BLOCKS 0 +#define EXT3_QUOTA_INIT_BLOCKS 0 +#endif + int ext3_mark_iloc_dirty(handle_t *handle, struct inode *inode, diff -puN include/linux/quota.h~ext3-journalled-quotas include/linux/quota.h --- 25/include/linux/quota.h~ext3-journalled-quotas 2004-04-14 18:37:51.813137464 -0700 +++ 25-akpm/include/linux/quota.h 2004-04-14 18:37:51.839133512 -0700 @@ -138,6 +138,10 @@ struct if_dqinfo { #include #include +/* Maximal numbers of writes for quota operation (insert/delete/update) + * (over all formats) - info block, 4 pointer blocks, data block */ +#define DQUOT_MAX_WRITES 6 + /* * Data for one user/group kept in memory */ @@ -168,22 +172,21 @@ struct mem_dqinfo { } u; }; +struct super_block; + #define DQF_MASK 0xffff /* Mask for format specific flags */ #define DQF_INFO_DIRTY_B 16 #define DQF_ANY_DQUOT_DIRTY_B 17 #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */ #define DQF_ANY_DQUOT_DIRTY (1 << DQF_ANY_DQUOT_DIRTY_B) /* Is any dquot dirty? */ -extern inline void mark_info_dirty(struct mem_dqinfo *info) -{ - set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags); -} - +extern void mark_info_dirty(struct super_block *sb, int type); #define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags) #define info_any_dquot_dirty(info) test_bit(DQF_ANY_DQUOT_DIRTY_B, &(info)->dqi_flags) #define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info)) #define sb_dqopt(sb) (&(sb)->s_dquot) +#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type)) struct dqstats { int lookups; @@ -200,15 +203,13 @@ extern struct dqstats dqstats; #define NR_DQHASH 43 /* Just an arbitrary number */ -#define DQ_MOD_B 0 -#define DQ_BLKS_B 1 -#define DQ_INODES_B 2 -#define DQ_FAKE_B 3 - -#define DQ_MOD (1 << DQ_MOD_B) /* dquot modified since read */ -#define DQ_BLKS (1 << DQ_BLKS_B) /* uid/gid has been warned about blk limit */ -#define DQ_INODES (1 << DQ_INODES_B) /* uid/gid has been warned about inode limit */ -#define DQ_FAKE (1 << DQ_FAKE_B) /* no limits only usage */ +#define DQ_MOD_B 0 /* dquot modified since read */ +#define DQ_BLKS_B 1 /* uid/gid has been warned about blk limit */ +#define DQ_INODES_B 2 /* uid/gid has been warned about inode limit */ +#define DQ_FAKE_B 3 /* no limits only usage */ +#define DQ_READ_B 4 /* dquot was read into memory */ +#define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ +#define DQ_WAITFREE_B 6 /* dquot being waited (by invalidate_dquots) */ struct dquot { struct list_head dq_hash; /* Hash list in memory */ @@ -216,8 +217,7 @@ struct dquot { struct list_head dq_free; /* Free list element */ struct semaphore dq_lock; /* dquot IO lock */ atomic_t dq_count; /* Use count */ - - /* fields after this point are cleared when invalidating */ + wait_queue_head_t dq_wait_unused; /* Wait queue for dquot to become unused */ struct super_block *dq_sb; /* superblock this applies to */ unsigned int dq_id; /* ID this applies to (uid, gid) */ loff_t dq_off; /* Offset of dquot on disk */ @@ -238,19 +238,22 @@ struct quota_format_ops { int (*write_file_info)(struct super_block *sb, int type); /* Write main info about file */ int (*free_file_info)(struct super_block *sb, int type); /* Called on quotaoff() */ int (*read_dqblk)(struct dquot *dquot); /* Read structure for one user */ - int (*commit_dqblk)(struct dquot *dquot); /* Write (or delete) structure for one user */ + int (*commit_dqblk)(struct dquot *dquot); /* Write structure for one user */ + int (*release_dqblk)(struct dquot *dquot); /* Called when last reference to dquot is being dropped */ }; /* Operations working with dquots */ struct dquot_operations { - void (*initialize) (struct inode *, int); - void (*drop) (struct inode *); + int (*initialize) (struct inode *, int); + int (*drop) (struct inode *); int (*alloc_space) (struct inode *, qsize_t, int); int (*alloc_inode) (const struct inode *, unsigned long); - void (*free_space) (struct inode *, qsize_t); - void (*free_inode) (const struct inode *, unsigned long); + int (*free_space) (struct inode *, qsize_t); + int (*free_inode) (const struct inode *, unsigned long); int (*transfer) (struct inode *, struct iattr *); - int (*write_dquot) (struct dquot *); + int (*write_dquot) (struct dquot *); /* Ordinary dquot write */ + int (*mark_dirty) (struct dquot *); /* Dquot is marked dirty */ + int (*write_info) (struct super_block *, int); /* Write of quota "superblock" */ }; /* Operations handling requests from userspace */ @@ -289,10 +292,7 @@ struct quota_info { }; /* Inline would be better but we need to dereference super_block which is not defined yet */ -#define mark_dquot_dirty(dquot) do {\ - set_bit(DQF_ANY_DQUOT_DIRTY_B, &(sb_dqopt((dquot)->dq_sb)->info[(dquot)->dq_type].dqi_flags));\ - set_bit(DQ_MOD_B, &(dquot)->dq_flags);\ -} while (0) +int mark_dquot_dirty(struct dquot *dquot); #define dquot_dirty(dquot) test_bit(DQ_MOD_B, &(dquot)->dq_flags) @@ -304,7 +304,6 @@ struct quota_info { int register_quota_format(struct quota_format_type *fmt); void unregister_quota_format(struct quota_format_type *fmt); -void init_dquot_operations(struct dquot_operations *fsdqops); struct quota_module_name { int qm_fmt_id; diff -puN include/linux/quotaops.h~ext3-journalled-quotas include/linux/quotaops.h --- 25/include/linux/quotaops.h~ext3-journalled-quotas 2004-04-14 18:37:51.814137312 -0700 +++ 25-akpm/include/linux/quotaops.h 2004-04-14 18:37:51.840133360 -0700 @@ -22,16 +22,31 @@ */ extern void sync_dquots(struct super_block *sb, int type); -extern void dquot_initialize(struct inode *inode, int type); -extern void dquot_drop(struct inode *inode); +extern int dquot_initialize(struct inode *inode, int type); +extern int dquot_drop(struct inode *inode); -extern int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); -extern int dquot_alloc_inode(const struct inode *inode, unsigned long number); +extern int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); +extern int dquot_alloc_inode(const struct inode *inode, unsigned long number); -extern void dquot_free_space(struct inode *inode, qsize_t number); -extern void dquot_free_inode(const struct inode *inode, unsigned long number); +extern int dquot_free_space(struct inode *inode, qsize_t number); +extern int dquot_free_inode(const struct inode *inode, unsigned long number); -extern int dquot_transfer(struct inode *inode, struct iattr *iattr); +extern int dquot_transfer(struct inode *inode, struct iattr *iattr); +extern int dquot_commit(struct dquot *dquot); +extern int dquot_acquire(struct dquot *dquot); +extern int dquot_release(struct dquot *dquot); +extern int dquot_commit_info(struct super_block *sb, int type); +extern int dquot_mark_dquot_dirty(struct dquot *dquot); + +extern int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path); +extern int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry); +extern int vfs_quota_off(struct super_block *sb, int type); +#define vfs_quota_off_mount(sb, type) vfs_quota_off(sb, type) +extern int vfs_quota_sync(struct super_block *sb, int type); +extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); +extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); +extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); +extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); /* * Operations supported for diskquotas. @@ -42,6 +57,8 @@ extern struct quotactl_ops vfs_quotactl_ #define sb_dquot_ops (&dquot_operations) #define sb_quotactl_ops (&vfs_quotactl_ops) +/* It is better to call this function outside of any transaction as it might + * need a lot of space in journal for dquot structure allocation. */ static __inline__ void DQUOT_INIT(struct inode *inode) { BUG_ON(!inode->i_sb); @@ -49,6 +66,7 @@ static __inline__ void DQUOT_INIT(struct inode->i_sb->dq_op->initialize(inode, -1); } +/* The same as with DQUOT_INIT */ static __inline__ void DQUOT_DROP(struct inode *inode) { if (IS_QUOTAINIT(inode)) { @@ -57,6 +75,8 @@ static __inline__ void DQUOT_DROP(struct } } +/* The following allocation/freeing/transfer functions *must* be called inside + * a transaction (deadlocks possible otherwise) */ static __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { if (sb_any_quota_enabled(inode->i_sb)) { @@ -137,6 +157,7 @@ static __inline__ int DQUOT_TRANSFER(str return 0; } +/* The following two functions cannot be called inside a transaction */ #define DQUOT_SYNC(sb) sync_dquots(sb, -1) static __inline__ int DQUOT_OFF(struct super_block *sb) _