This is my fourth attempt to patch the isofs code. It is similar to the last posting except this one implements the NFS get_parent() method which has always been missing. The original problem I set out to addresses is that the current iso9660 file system cannot reach inodes located beyond the 4GB barrier. This is caused by using the inode number as the byte offset of the inode data. Being 32-bits wide, the inode number is unable to reach inode data that does not reside on the first 4GB of the file system. This causes real problems with "growisofs" http://fy.chalmers.se/~appro/linux/DVD+RW/#isofs4gb and my pet project "shunt" http://www.serice.net/shunt/ This patch switches the isofs code from iget() to iget5_locked() which allows extra data to be passed into isofs_read_inode() so that inode data anywhere on the disk can be reached. The inode number scheme was also changed. Continuing to use the byte offset would have resulted in non-unique inodes in many common situations, but because the inode number no longer plays any role in reading the meta-data off the disk, I was free to set the inode number to some unique characteristic of the file. I have chosen to use the block offset which is also 32-bits wide. Lastly, the pre-patch code uses the default export_operations to handle accessing the file system through NFS. The problem with this is that the default NFS operations assume that iget() works which is no longer the case because of the necessity of switching to iget5_locked(). So, I had to implement the NFS operations too. As a bonus, I went ahead and implemented the NFS get_parent() method which has always been missing. Signed-off-by: Andrew Morton --- 25-akpm/fs/isofs/export.c | 89 +++++++++++++++++++++++++++++++++++++++++ 25-akpm/fs/isofs/inode.c | 9 ++-- 25-akpm/fs/isofs/namei.c | 5 +- 25-akpm/include/linux/iso_fs.h | 58 ++++++++++++++++++++++++++ 4 files changed, 155 insertions(+), 6 deletions(-) diff -puN fs/isofs/export.c~iso9660-inodes-beyond-4gb-fixes fs/isofs/export.c --- 25/fs/isofs/export.c~iso9660-inodes-beyond-4gb-fixes 2004-06-01 21:52:38.410676984 -0700 +++ 25-akpm/fs/isofs/export.c 2004-06-01 21:52:38.418675768 -0700 @@ -13,6 +13,7 @@ * fs/exportfs/expfs.c. */ +#include #include #include #include @@ -55,6 +56,93 @@ isofs_export_get_dentry(struct super_blo return isofs_export_iget(sb, block, offset, generation); } +/* This function is surprisingly simple. The trick is understanding + * that "child" is always a directory. So, to find its parent, you + * simply need to find its ".." entry, normalize its block and offset, + * and return the underlying inode. See the comments for + * isofs_normalize_block_and_offset(). */ +static struct dentry *isofs_export_get_parent(struct dentry *child) +{ + unsigned long parent_block = 0; + unsigned long parent_offset = 0; + struct inode *child_inode = child->d_inode; + struct iso_inode_info *e_child_inode = ISOFS_I(child_inode); + struct inode *parent_inode = NULL; + struct iso_directory_record *de = NULL; + struct buffer_head * bh = NULL; + struct dentry *rv = NULL; + + /* "child" must always be a directory. */ + if (!S_ISDIR(child_inode->i_mode)) { + printk(KERN_ERR "isofs: isofs_export_get_parent(): " + "child is not a directory!\n"); + rv = ERR_PTR(-EACCES); + goto out; + } + + /* It is an invariant that the directory offset is zero. If + * it is not zero, it means the directory failed to be + * normalized for some reason. */ + if (e_child_inode->i_iget5_offset != 0) { + printk(KERN_ERR "isofs: isofs_export_get_parent(): " + "child directory not normalized!\n"); + rv = ERR_PTR(-EACCES); + goto out; + } + + /* The child inode has been normalized such that its + * i_iget5_block value points to the "." entry. Fortunately, + * the ".." entry is located in the same block. */ + parent_block = e_child_inode->i_iget5_block; + + /* Get the block in question. */ + bh = sb_bread(child_inode->i_sb, parent_block); + if (bh == NULL) { + rv = ERR_PTR(-EACCES); + goto out; + } + + /* This is the "." entry. */ + de = (struct iso_directory_record*)bh->b_data; + + /* The ".." entry is always the second entry. */ + parent_offset = (unsigned long)isonum_711(de->length); + de = (struct iso_directory_record*)(bh->b_data + parent_offset); + + /* Verify it is in fact the ".." entry. */ + if ((isonum_711(de->name_len) != 1) || (de->name[0] != 1)) { + printk(KERN_ERR "isofs: Unable to find the \"..\" " + "directory for NFS.\n"); + rv = ERR_PTR(-EACCES); + goto out; + } + + /* Normalize */ + isofs_normalize_block_and_offset(de, &parent_block, &parent_offset); + + /* Get the inode. */ + parent_inode = isofs_iget(child_inode->i_sb, + parent_block, + parent_offset); + if (parent_inode == NULL) { + rv = ERR_PTR(-EACCES); + goto out; + } + + /* Allocate the dentry. */ + rv = d_alloc_anon(parent_inode); + if (rv == NULL) { + rv = ERR_PTR(-ENOMEM); + goto out; + } + + out: + if (bh) { + brelse(bh); + } + return rv; +} + static int isofs_export_encode_fh(struct dentry *dentry, __u32 *fh32, @@ -139,4 +227,5 @@ struct export_operations isofs_export_op .decode_fh = isofs_export_decode_fh, .encode_fh = isofs_export_encode_fh, .get_dentry = isofs_export_get_dentry, + .get_parent = isofs_export_get_parent, }; diff -puN fs/isofs/inode.c~iso9660-inodes-beyond-4gb-fixes fs/isofs/inode.c --- 25/fs/isofs/inode.c~iso9660-inodes-beyond-4gb-fixes 2004-06-01 21:52:38.412676680 -0700 +++ 25-akpm/fs/isofs/inode.c 2004-06-01 21:52:38.420675464 -0700 @@ -8,6 +8,7 @@ * 1997 Gordon Chaffee - Joliet CDs * 1998 Eric Lammerts - ISO 9660 Level 3 * 2004 Paul Serice - Comprehensive Inode Scheme + * 2004 Paul Serice - NFS Export Operations */ #include @@ -1407,10 +1408,10 @@ static int isofs_iget5_set(struct inode return 0; } -/* Store the block and block offset in the inode's containing - * structure and sets the inode number (used to compute the hash - * value) to the lower 32-bits of the absolute offset. The code below - * is otherwise similar to the iget() code in include/linux/fs.h */ +/* Store, in the inode's containing structure, the block and block + * offset that point to the underlying meta-data for the inode. The + * code below is otherwise similar to the iget() code in + * include/linux/fs.h */ struct inode *isofs_iget(struct super_block *sb, unsigned long block, unsigned long offset) diff -puN fs/isofs/namei.c~iso9660-inodes-beyond-4gb-fixes fs/isofs/namei.c --- 25/fs/isofs/namei.c~iso9660-inodes-beyond-4gb-fixes 2004-06-01 21:52:38.413676528 -0700 +++ 25-akpm/fs/isofs/namei.c 2004-06-01 21:52:38.420675464 -0700 @@ -152,9 +152,12 @@ isofs_find_entry(struct inode *dir, stru match = (isofs_cmp(dentry,dpnt,dlen) == 0); } if (match) { - if (bh) brelse(bh); + isofs_normalize_block_and_offset(de, + &block_saved, + &offset_saved); *block_rv = block_saved; *offset_rv = offset_saved; + if (bh) brelse(bh); return 1; } } diff -puN include/linux/iso_fs.h~iso9660-inodes-beyond-4gb-fixes include/linux/iso_fs.h --- 25/include/linux/iso_fs.h~iso9660-inodes-beyond-4gb-fixes 2004-06-01 21:52:38.415676224 -0700 +++ 25-akpm/include/linux/iso_fs.h 2004-06-01 21:52:38.421675312 -0700 @@ -235,9 +235,65 @@ extern struct inode *isofs_iget(struct s unsigned long block, unsigned long offset); +/* Because the inode number is no longer relevant to finding the + * underlying meta-data for an inode, we are free to choose a more + * convenient 32-bit number as the inode number. Because directories + * and files are block aligned (except in a few very unusual cases) + * and because blocks are limited to 32-bits, I've chosen the starting + * block that holds the file or directory data as the inode number. + * + * One nice side effect of this is that you can use "ls -i" to get the + * inode number which will tell you exactly where you need to start a + * hex dump if you want to see the contents of the directory or + * file. */ static inline unsigned long isofs_get_ino(struct iso_directory_record *d) { - return (unsigned long)isonum_733(d->extent); + return (unsigned long)isonum_733(d->extent) + + (unsigned long)isonum_711(d->ext_attr_length); +} + +/* Every directory can have many redundant directory entries scattered + * throughout the directory tree. First there is the directory entry + * with the name of the directory stored in the parent directory. + * Then, there is the "." directory entry stored in the directory + * itself. Finally, there are possibly many ".." directory entries + * stored in all the subdirectories. + * + * In order for the NFS get_parent() method to work and for the + * general consistency of the dcache, we need to make sure the + * "i_iget5_block" and "i_iget5_offset" all point to exactly one of + * the many redundant entries for each directory. We normalize the + * block and offset by always making them point to the "." directory. + * + * Notice that we do not use the entry for the directory with the name + * that is located in the parent directory. Even though choosing this + * first directory is more natural, it is much easier to find the "." + * entry in the NFS get_parent() method because it is implicitly + * encoded in the "extent + ext_attr_length" fields of _all_ the + * redundant entries for the directory. Thus, it can always be + * reached regardless of which directory entry you have in hand. + * + * This works because the "." entry is simply the first directory + * record when you start reading the file that holds all the directory + * records, and this file starts at "extent + ext_attr_length" blocks. + * Because the "." entry is always the first entry listed in the + * directories file, the normalized "offset" value is always 0. + * + * You should pass the directory entry in "de". On return, "block" + * and "offset" will hold normalized values. Only directories are + * affected making it safe to call even for non-directory file + * types. */ +static void inline +isofs_normalize_block_and_offset(struct iso_directory_record* de, + unsigned long *block, + unsigned long *offset) +{ + /* Only directories are normalized. */ + if (de->flags[0] & 2) { + *offset = 0; + *block = (unsigned long)isonum_733(de->extent) + + (unsigned long)isonum_711(de->ext_attr_length); + } } extern struct inode_operations isofs_dir_inode_operations; _