Patch from "Theodore Ts'o" The following patch should (in theory) fix the htree/NFS readdir problems that people have reported. Specifically, it should fix the NFS looping on EOF problem with readdir, as well as the problems caused by coverting a directory to HTREE while an NFS readdir is in progress problem. I'd appreciate it if people who can easily replicate these NFS/htree problems could give this patch (against BK-recent / 2.5.63) a whirl. Thanks!! fs/ext3/dir.c | 18 +++++++- fs/ext3/namei.c | 118 ++++++++++++++++++++++++++++++++++++++------------------ 2 files changed, 98 insertions(+), 38 deletions(-) diff -puN fs/ext3/dir.c~htree-nfs-fix fs/ext3/dir.c --- 25/fs/ext3/dir.c~htree-nfs-fix 2003-03-04 22:19:39.000000000 -0800 +++ 25-akpm/fs/ext3/dir.c 2003-03-04 22:19:39.000000000 -0800 @@ -103,7 +103,11 @@ static int ext3_readdir(struct file * fi sb = inode->i_sb; - if (is_dx(inode)) { +#ifdef CONFIG_EXT3_INDEX + if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, + EXT3_FEATURE_COMPAT_DIR_INDEX) && + ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || + ((inode->i_size >> sb->s_blocksize_bits) == 1))) { err = ext3_dx_readdir(filp, dirent, filldir); if (err != ERR_BAD_DX_DIR) { unlock_kernel(); @@ -115,6 +119,7 @@ static int ext3_readdir(struct file * fi */ EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; } +#endif stored = 0; bh = NULL; offset = filp->f_pos & (sb->s_blocksize - 1); @@ -434,6 +439,9 @@ static int ext3_dx_readdir(struct file * filp->private_data = info; } + if (filp->f_pos == -1) + return 0; /* EOF */ + /* Some one has messed with f_pos; reset the world */ if (info->last_pos != filp->f_pos) { free_rb_tree_fname(&info->root); @@ -470,8 +478,10 @@ static int ext3_dx_readdir(struct file * &info->next_hash); if (ret < 0) return ret; - if (ret == 0) + if (ret == 0) { + filp->f_pos = -1; break; + } info->curr_node = rb_first(&info->root); } @@ -483,6 +493,10 @@ static int ext3_dx_readdir(struct file * info->curr_node = rb_next(info->curr_node); if (!info->curr_node) { + if (info->next_hash == ~0) { + filp->f_pos = -1; + break; + } info->curr_hash = info->next_hash; info->curr_minor_hash = 0; } diff -puN fs/ext3/namei.c~htree-nfs-fix fs/ext3/namei.c --- 25/fs/ext3/namei.c~htree-nfs-fix 2003-03-04 22:19:39.000000000 -0800 +++ 25-akpm/fs/ext3/namei.c 2003-03-04 22:19:39.000000000 -0800 @@ -170,7 +170,7 @@ static struct ext3_dir_entry_2* dx_pack_ static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); static int ext3_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, - struct dx_frame *frames, int *err, + struct dx_frame *frames, __u32 *start_hash); static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, struct ext3_dir_entry_2 **res_dir, int *err); @@ -239,6 +239,17 @@ static inline unsigned dx_node_limit (st * Debug */ #ifdef DX_DEBUG +static void dx_show_index (char * label, struct dx_entry *entries) +{ + int i, n = dx_get_count (entries); + printk("%s index ", label); + for (i = 0; i < n; i++) + { + printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i)); + } + printk("\n"); +} + struct stats { unsigned names; @@ -447,22 +458,21 @@ static void dx_release (struct dx_frame * * This function returns 1 if the caller should continue to search, * or 0 if it should not. If there is an error reading one of the - * index blocks, it will return -1. + * index blocks, it will a negative error code. * * If start_hash is non-null, it will be filled in with the starting * hash of the next page. */ static int ext3_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, - struct dx_frame *frames, int *err, + struct dx_frame *frames, __u32 *start_hash) { struct dx_frame *p; struct buffer_head *bh; - int num_frames = 0; + int err, num_frames = 0; __u32 bhash; - *err = ENOENT; p = frame; /* * Find the next leaf page by incrementing the frame pointer. @@ -500,8 +510,8 @@ static int ext3_htree_next_block(struct */ while (num_frames--) { if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), - 0, err))) - return -1; /* Failure */ + 0, &err))) + return err; /* Failure */ p++; brelse (p->bh); p->bh = bh; @@ -521,6 +531,46 @@ static inline struct ext3_dir_entry_2 *e /* * This function fills a red-black tree with information from a + * directory block. It returns the number directory entries loaded + * into the tree. If there is an error it is returned in err. + */ +static int htree_dirblock_to_tree(struct file *dir_file, + struct inode *dir, int block, + struct dx_hash_info *hinfo, + __u32 start_hash, __u32 start_minor_hash) +{ + struct buffer_head *bh; + struct ext3_dir_entry_2 *de, *top; + int err, count = 0; + + dxtrace(printk("In htree dirblock_to_tree: block %d\n", block)); + if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) + return err; + + de = (struct ext3_dir_entry_2 *) bh->b_data; + top = (struct ext3_dir_entry_2 *) ((char *) de + + dir->i_sb->s_blocksize - + EXT3_DIR_REC_LEN(0)); + for (; de < top; de = ext3_next_entry(de)) { + ext3fs_dirhash(de->name, de->name_len, hinfo); + if ((hinfo->hash < start_hash) || + ((hinfo->hash == start_hash) && + (hinfo->minor_hash < start_minor_hash))) + continue; + if ((err = ext3_htree_store_dirent(dir_file, + hinfo->hash, hinfo->minor_hash, de)) != 0) { + brelse(bh); + return err; + } + count++; + } + brelse(bh); + return count; +} + + +/* + * This function fills a red-black tree with information from a * directory. We start scanning the directory in hash order, starting * at start_hash and start_minor_hash. * @@ -531,8 +581,7 @@ int ext3_htree_fill_tree(struct file *di __u32 start_minor_hash, __u32 *next_hash) { struct dx_hash_info hinfo; - struct buffer_head *bh; - struct ext3_dir_entry_2 *de, *top; + struct ext3_dir_entry_2 *de; struct dx_frame frames[2], *frame; struct inode *dir; int block, err; @@ -543,6 +592,14 @@ int ext3_htree_fill_tree(struct file *di dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, start_minor_hash)); dir = dir_file->f_dentry->d_inode; + if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) { + hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; + hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed; + count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, + start_hash, start_minor_hash); + *next_hash = ~0; + return count; + } hinfo.hash = start_hash; hinfo.minor_hash = 0; frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err); @@ -562,34 +619,21 @@ int ext3_htree_fill_tree(struct file *di while (1) { block = dx_get_block(frame->at); - dxtrace(printk("Reading block %d\n", block)); - if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) + ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, + start_hash, start_minor_hash); + if (ret < 0) { + err = ret; goto errout; - - de = (struct ext3_dir_entry_2 *) bh->b_data; - top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - - EXT3_DIR_REC_LEN(0)); - for (; de < top; de = ext3_next_entry(de)) { - ext3fs_dirhash(de->name, de->name_len, &hinfo); - if ((hinfo.hash < start_hash) || - ((hinfo.hash == start_hash) && - (hinfo.minor_hash < start_minor_hash))) - continue; - if ((err = ext3_htree_store_dirent(dir_file, - hinfo.hash, hinfo.minor_hash, de)) != 0) { - brelse(bh); - goto errout; - } - count++; } - brelse (bh); - hashval = ~1; + count += ret; + hashval = ~0; ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, - frame, frames, &err, &hashval); - if (next_hash) - *next_hash = hashval; - if (ret == -1) + frame, frames, &hashval); + *next_hash = hashval; + if (ret < 0) { + err = ret; goto errout; + } /* * Stop if: (a) there are no more entries, or * (b) we have inserted at least one entry and the @@ -600,7 +644,8 @@ int ext3_htree_fill_tree(struct file *di break; } dx_release(frames); - dxtrace(printk("Fill tree: returned %d entries\n", count)); + dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", + count, *next_hash)); return count; errout: dx_release(frames); @@ -909,11 +954,12 @@ static struct buffer_head * ext3_dx_find brelse (bh); /* Check to see if we should continue to search */ retval = ext3_htree_next_block(dir, hash, frame, - frames, err, 0); - if (retval == -1) { + frames, 0); + if (retval < 0) { ext3_warning(sb, __FUNCTION__, "error reading index page in directory #%lu", dir->i_ino); + *err = retval; goto errout; } } while (retval == 1); _