From: Mingming Cao ext3_rsv_mount.patch: Adds features on top of the ext3_rsv_base.patch: - deal with earlier bogus -ENOSPC error - do block reservation only for regular file - make the ext3 reservation feature as a mount option: new mount option added: reservation - A pair of file ioctl commands are added for application to control the block reservation window size. --- 25-akpm/fs/ext3/balloc.c | 61 ++++++++++++++++++++++++++++---------- 25-akpm/fs/ext3/ialloc.c | 2 - 25-akpm/fs/ext3/inode.c | 2 - 25-akpm/fs/ext3/ioctl.c | 20 ++++++++++++ 25-akpm/fs/ext3/super.c | 20 +++++++++++- 25-akpm/include/linux/ext3_fs.h | 42 ++++++++++++++------------ 25-akpm/include/linux/ext3_fs_i.h | 2 - 7 files changed, 111 insertions(+), 38 deletions(-) diff -puN fs/ext3/balloc.c~ext3_rsv_mount fs/ext3/balloc.c --- 25/fs/ext3/balloc.c~ext3_rsv_mount Wed Apr 14 17:05:45 2004 +++ 25-akpm/fs/ext3/balloc.c Wed Apr 14 17:05:45 2004 @@ -714,7 +714,7 @@ static int alloc_new_reservation(struct else start_block = goal + group_first_block; - size = my_rsv->rsv_goal_size; + size = atomic_read(&my_rsv->rsv_goal_size); /* if we have a old reservation, discard it first */ if (!rsv_is_empty(my_rsv)) { /* @@ -862,7 +862,16 @@ ext3_try_to_allocate_with_rsv(struct sup return -1; } -#ifdef EXT3_RESERVATION + /* + * we don't deal with reservation when + * filesystem is mounted without reservation + * or the file is not a regular file + * of last attemp of allocating a block with reservation turn on failed + */ + if (my_rsv == NULL ) { + ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL); + goto out; + } rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; /* * goal is a group relative block number (if there is a goal) @@ -906,10 +915,7 @@ ext3_try_to_allocate_with_rsv(struct sup if (ret >= 0) break; /* succeed */ } -#else - ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL); -#endif - +out: if (ret >= 0) { BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " "bitmap block"); @@ -937,12 +943,13 @@ ext3_try_to_allocate_with_rsv(struct sup int ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, int *errp) { - struct buffer_head *bitmap_bh = NULL; /* bh */ - struct buffer_head *gdp_bh; /* bh2 */ - int group_no; /* i */ - int ret_block; /* j */ - int bgi; /* blockgroup iteration index */ - int target_block; /* tmp */ + struct buffer_head *bitmap_bh = NULL; + struct buffer_head *gdp_bh; + int group_no; + int goal_group; + int ret_block; + int bgi; /* blockgroup iteration index */ + int target_block; int fatal = 0, err; int performed_allocation = 0; int free_blocks, root_blocks; @@ -950,7 +957,7 @@ int ext3_new_block(handle_t *handle, str struct ext3_group_desc *gdp; struct ext3_super_block *es; struct ext3_sb_info *sbi; - struct reserve_window *my_rsv = &EXT3_I(inode)->i_rsv_window; + struct reserve_window *my_rsv = NULL; #ifdef EXT3FS_DEBUG static int goal_hits, goal_attempts; #endif @@ -972,7 +979,10 @@ int ext3_new_block(handle_t *handle, str sbi = EXT3_SB(sb); es = EXT3_SB(sb)->s_es; ext3_debug("goal=%lu.\n", goal); - +#ifdef EXT3_RESERVATION + if (test_opt(sb, RESERVATION) && S_ISREG(inode->i_mode)) + my_rsv = &EXT3_I(inode)->i_rsv_window; +#endif free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(es->s_r_blocks_count); if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && @@ -994,6 +1004,8 @@ int ext3_new_block(handle_t *handle, str if (!gdp) goto io_error; + goal_group = group_no; +retry: free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); if (free_blocks > 0) { ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % @@ -1037,7 +1049,26 @@ int ext3_new_block(handle_t *handle, str if (ret_block >= 0) goto allocated; } - +#ifdef EXT3_RESERVATION + /* + * We may end up a bogus ealier ENOSPC error due to + * filesystem is "full" of reservations, but + * there maybe indeed free blocks avaliable on disk + * In this case, we just forget about the reservations + * just do block allocation as without reservations. + */ + if (my_rsv) { +#ifdef EXT3_RESERVATION_DEBUG + printk("filesystem is fully reserved. Actual free blocks: %d. " + "Try to do allocation without reservation, goal_group " + "is %d\n", + free_blocks, goal_group); +#endif + my_rsv = NULL; + group_no = goal_group; + goto retry; + } +#endif /* No space left on the device */ *errp = -ENOSPC; goto out; diff -puN fs/ext3/ialloc.c~ext3_rsv_mount fs/ext3/ialloc.c --- 25/fs/ext3/ialloc.c~ext3_rsv_mount Wed Apr 14 17:05:45 2004 +++ 25-akpm/fs/ext3/ialloc.c Wed Apr 14 17:05:45 2004 @@ -583,7 +583,7 @@ got: ei->i_dtime = 0; ei->i_rsv_window.rsv_start = 0; ei->i_rsv_window.rsv_end= 0; - ei->i_rsv_window.rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS; + atomic_set(&ei->i_rsv_window.rsv_goal_size, EXT3_DEFAULT_RESERVE_BLOCKS); INIT_LIST_HEAD(&ei->i_rsv_window.rsv_list); ei->i_block_group = group; diff -puN fs/ext3/inode.c~ext3_rsv_mount fs/ext3/inode.c --- 25/fs/ext3/inode.c~ext3_rsv_mount Wed Apr 14 17:05:45 2004 +++ 25-akpm/fs/ext3/inode.c Wed Apr 14 17:05:45 2004 @@ -2450,7 +2450,7 @@ void ext3_read_inode(struct inode * inod ei->i_block_group = iloc.block_group; ei->i_rsv_window.rsv_start = 0; ei->i_rsv_window.rsv_end= 0; - ei->i_rsv_window.rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS; + atomic_set(&ei->i_rsv_window.rsv_goal_size, EXT3_DEFAULT_RESERVE_BLOCKS); INIT_LIST_HEAD(&ei->i_rsv_window.rsv_list); /* * NOTE! The in-memory inode i_data array is in little-endian order diff -puN fs/ext3/ioctl.c~ext3_rsv_mount fs/ext3/ioctl.c --- 25/fs/ext3/ioctl.c~ext3_rsv_mount Wed Apr 14 17:05:45 2004 +++ 25-akpm/fs/ext3/ioctl.c Wed Apr 14 17:05:45 2004 @@ -20,6 +20,7 @@ int ext3_ioctl (struct inode * inode, st { struct ext3_inode_info *ei = EXT3_I(inode); unsigned int flags; + unsigned short rsv_window_size; ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); @@ -151,6 +152,25 @@ flags_err: return ret; } #endif +#ifdef EXT3_RESERVATION + case EXT3_IOC_GETRSVSZ: + rsv_window_size = atomic_read(&ei->i_rsv_window.rsv_goal_size); + return put_user(rsv_window_size, (int *)arg); + case EXT3_IOC_SETRSVSZ: + if (IS_RDONLY(inode)) + return -EROFS; + + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EACCES; + + if (get_user(rsv_window_size, (int *)arg)) + return -EFAULT; + + if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS) + rsv_window_size = EXT3_MAX_RESERVE_BLOCKS; + atomic_set(&ei->i_rsv_window.rsv_goal_size, rsv_window_size); + return 0; +#endif default: return -ENOTTY; } diff -puN fs/ext3/super.c~ext3_rsv_mount fs/ext3/super.c --- 25/fs/ext3/super.c~ext3_rsv_mount Wed Apr 14 17:05:45 2004 +++ 25-akpm/fs/ext3/super.c Wed Apr 14 17:05:45 2004 @@ -572,7 +572,8 @@ enum { Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload, + Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, + Opt_reservation, Opt_noreservation, Opt_noload, Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, @@ -604,6 +605,8 @@ static match_table_t tokens = { {Opt_nouser_xattr, "nouser_xattr"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, + {Opt_reservation, "reservation"}, + {Opt_noreservation, "noreservation"}, {Opt_noload, "noload"}, {Opt_commit, "commit=%u"}, {Opt_journal_update, "journal=update"}, @@ -757,6 +760,19 @@ static int parse_options (char * options printk("EXT3 (no)acl options not supported\n"); break; #endif +#ifdef EXT3_RESERVATION + case Opt_reservation: + set_opt(sbi->s_mount_opt, RESERVATION); + break; + case Opt_noreservation: + clear_opt(sbi->s_mount_opt, RESERVATION); + break; +#else + case Opt_reservation: + case Opt_noreservation: + printk("EXT3 block reservation options not supported\n"); + break; +#endif case Opt_journal_update: /* @@@ FIXME */ /* Eventually we will want to be able to create @@ -1456,7 +1472,7 @@ static int ext3_fill_super (struct super INIT_LIST_HEAD(&sbi->s_rsv_window_head.rsv_list); sbi->s_rsv_window_head.rsv_start = 0; sbi->s_rsv_window_head.rsv_end = 0; - sbi->s_rsv_window_head.rsv_goal_size = 0; + atomic_set(&sbi->s_rsv_window_head.rsv_goal_size, 0); /* * set up enough so that it can read an inode diff -puN include/linux/ext3_fs.h~ext3_rsv_mount include/linux/ext3_fs.h --- 25/include/linux/ext3_fs.h~ext3_rsv_mount Wed Apr 14 17:05:45 2004 +++ 25-akpm/include/linux/ext3_fs.h Wed Apr 14 17:05:45 2004 @@ -37,6 +37,7 @@ struct statfs; */ #define EXT3_RESERVATION #define EXT3_DEFAULT_RESERVE_BLOCKS 8 +#define EXT3_MAX_RESERVE_BLOCKS 1024 /* * Always enable hashed directories */ @@ -207,6 +208,10 @@ struct ext3_group_desc #ifdef CONFIG_JBD_DEBUG #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) #endif +#ifdef EXT3_RESERVATION +#define EXT3_IOC_GETRSVSZ _IOR('r', 1, long) +#define EXT3_IOC_SETRSVSZ _IOW('r', 2, long) +#endif /* * Structure of an inode on the disk @@ -305,24 +310,25 @@ struct ext3_inode { /* * Mount flags */ -#define EXT3_MOUNT_CHECK 0x0001 /* Do mount-time checks */ -#define EXT3_MOUNT_OLDALLOC 0x0002 /* Don't use the new Orlov allocator */ -#define EXT3_MOUNT_GRPID 0x0004 /* Create files with directory's group */ -#define EXT3_MOUNT_DEBUG 0x0008 /* Some debugging messages */ -#define EXT3_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */ -#define EXT3_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */ -#define EXT3_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ -#define EXT3_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ -#define EXT3_MOUNT_NOLOAD 0x0100 /* Don't use existing journal*/ -#define EXT3_MOUNT_ABORT 0x0200 /* Fatal error detected */ -#define EXT3_MOUNT_DATA_FLAGS 0x0C00 /* Mode for data writes: */ - #define EXT3_MOUNT_JOURNAL_DATA 0x0400 /* Write data to journal */ - #define EXT3_MOUNT_ORDERED_DATA 0x0800 /* Flush data before commit */ - #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ -#define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ -#define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ -#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ -#define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ +#define EXT3_MOUNT_CHECK 0x00001 /* Do mount-time checks */ +#define EXT3_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ +#define EXT3_MOUNT_GRPID 0x00004 /* Create files with directory's group */ +#define EXT3_MOUNT_DEBUG 0x00008 /* Some debugging messages */ +#define EXT3_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ +#define EXT3_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ +#define EXT3_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ +#define EXT3_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ +#define EXT3_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ +#define EXT3_MOUNT_ABORT 0x00200 /* Fatal error detected */ +#define EXT3_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ +#define EXT3_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ +#define EXT3_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ +#define EXT3_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ +#define EXT3_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ +#define EXT3_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ +#define EXT3_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ +#define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ +#define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H diff -puN include/linux/ext3_fs_i.h~ext3_rsv_mount include/linux/ext3_fs_i.h --- 25/include/linux/ext3_fs_i.h~ext3_rsv_mount Wed Apr 14 17:05:45 2004 +++ 25-akpm/include/linux/ext3_fs_i.h Wed Apr 14 17:05:45 2004 @@ -22,7 +22,7 @@ struct reserve_window { struct list_head rsv_list; __u32 rsv_start; __u32 rsv_end; - unsigned short rsv_goal_size; + atomic_t rsv_goal_size; }; /* _