From: Hans Reiser This patch introduces filesystem kobject. It allows a filesystem to register kobjects and to export its information via sysfs. Reiser4 is using it to show statistics and perform online tunning Signed-off-by: Andrew Morton --- 25-akpm/fs/super.c | 203 ++++++++++++++++++++++++++++++++++++++++++++- 25-akpm/include/linux/fs.h | 66 ++++++++++++++ 2 files changed, 268 insertions(+), 1 deletion(-) diff -puN fs/super.c~reiser4-kobject-umount-race fs/super.c --- 25/fs/super.c~reiser4-kobject-umount-race 2004-12-03 20:56:58.288898928 -0800 +++ 25-akpm/fs/super.c 2004-12-03 20:56:58.294898016 -0800 @@ -69,6 +69,7 @@ static struct super_block *alloc_super(v INIT_LIST_HEAD(&s->s_io); INIT_LIST_HEAD(&s->s_files); INIT_LIST_HEAD(&s->s_instances); + INIT_LIST_HEAD(&s->s_entries); INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); init_rwsem(&s->s_umount); @@ -103,6 +104,205 @@ static inline void destroy_super(struct kfree(s); } +/* struct sb_entry obmanipulations. */ + +/** + * register_sb_entry - attach sb_entry to a superblock + * @s: superblock to attach to + * @entry: entry to attach to the @s + * + * Attaches @entry to the superblock, marks it as live. Called under + * write-taken @s->s_umount. + */ +static void register_sb_entry(struct super_block *s, struct sb_entry *entry) +{ + entry->live = 1; + list_add(&entry->linkage, &s->s_entries); + entry->type = s->s_type; + get_filesystem(entry->type); + entry->s = s; +} + +/** + * unregister_sb_entry - detach sb_entry from superblock + * @entry: entry to detach + * + * Detaches @entry from the superblock, and marks it as dead. Called + * under write-taken @entry->s->s_umount. + */ +static void unregister_sb_entry(struct sb_entry *entry) +{ + entry->live = 0; + list_del_init(&entry->linkage); + if (entry->type != NULL) { + put_filesystem(entry->type); + entry->type = NULL; + } +} + +/* helper (*test) callback for get_sb_entry. Should probably be just declared + * local in get_sb_entry(). */ +static int test_sb(struct super_block *sb, void *data) +{ + return data == sb; +} + +/* helper (*set) callback for get_sb_entry. */ +static int set_sb(struct super_block *sb, void *data) +{ + return -ENOENT; +} + +/** + * get_sb_entry - acquire a reference to sb_entry + * @entry: sb_entry to acquire reference to + * + * Acquires a reference to the @entry. This means that after successful + * return from this function @entry (and @entry->s) are guaranteed to + * exist until corresponding call to @put_sb_entry. This function is used + * to avoid races between sysfs/procfs methods and umount. Fails if + * sget() failed or @entry->s is already unmounted. + */ +static int get_sb_entry(struct sb_entry *entry) +{ + int result; + struct super_block *s; + + s = sget(entry->type, test_sb, set_sb, entry->s); + if (IS_ERR(s)) + return PTR_ERR(s); + /* at this point @s is pinned, and s->s_umount is write-taken */ + result = entry->live ? 0 : -ENOENT; + up_write(&s->s_umount); + if (result != 0) + deactivate_super(s); + return result; +} + +/** + * put_sb_entry - release a reference to sb_entry + * @entry: sb_entry to release reference to + * + * Dual to get_sb_entry(), should always be called if former returned + * success. + */ +static void put_sb_entry(struct sb_entry *entry) +{ + /* release reference acquired by get_sb_entry() */ + deactivate_super(entry->s); +} + +/* helper to deactivate_super(): detach all sb_entries from @s */ +static void kill_sb_entries(struct super_block *s) +{ + while (!list_empty(&s->s_entries)) + unregister_sb_entry(container_of(s->s_entries.next, + struct sb_entry, linkage)); +} + +/** + * fs_kobject_register - register file-system kobject + * @s: superblock @fskobj belongs to + * @fskobj: file-system kobject to register + * + * Registers kobject and sb_entry parts of + * @fskobj. Cf. include/linux/fs.h:struct fs_kobject + */ +int fs_kobject_register(struct super_block *s, struct fs_kobject * fskobj) +{ + int result; + + result = kobject_register(&fskobj->kobj); + if (result == 0) + register_sb_entry(s, &fskobj->entry); + return result; +} +EXPORT_SYMBOL(fs_kobject_register); + +/** + * fs_kobject_register - register file-system kobject + * @fskobj: file-system kobject to register + * + * Unregisters kobject and sb_entry parts of @fskobj. + */ +void fs_kobject_unregister(struct fs_kobject * fskobj) +{ + unregister_sb_entry(&fskobj->entry); + kobject_unregister(&fskobj->kobj); +} +EXPORT_SYMBOL(fs_kobject_unregister); + +/** + * fs_attr_show - ->show method for fs_attr_ops. + * @kobj: kobject embedded into struct fs_kobject + * @kattr: kattr embedded into struct fs_kattr + * + * Calls ->show() method of fs_kattr into which @kattr is + * embedded. Avoids races with umount. + */ +static ssize_t +fs_attr_show(struct kobject *kobj, struct attribute *kattr, char *buf) +{ + struct fs_kobject *fskobj; + struct fs_kattr *fskattr; + struct sb_entry *entry; + int result; + + fskobj = container_of(kobj, struct fs_kobject, kobj); + fskattr = container_of(kattr, struct fs_kattr, kattr); + entry = &fskobj->entry; + + result = get_sb_entry(entry); + if (result == 0) { + if (fskattr->show != NULL) + result = fskattr->show(entry->s, fskobj, fskattr, buf); + else + result = 0; + put_sb_entry(entry); + } + return result; +} + +/** + * fs_attr_store - ->store method for fs_attr_ops. + * @kobj: kobject embedded into struct fs_kobject + * @kattr: kattr embedded into struct fs_kattr + * + * Calls ->store() method of fs_kattr into which @kattr is + * embedded. Avoids races with umount. + */ +static ssize_t +fs_attr_store(struct kobject *kobj, struct attribute *kattr, + const char *buf, size_t size) +{ + struct fs_kobject *fskobj; + struct fs_kattr *fskattr; + struct sb_entry *entry; + int result; + + fskobj = container_of(kobj, struct fs_kobject, kobj); + fskattr = container_of(kattr, struct fs_kattr, kattr); + entry = &fskobj->entry; + + result = get_sb_entry(entry); + if (result == 0) { + if (fskattr->store != NULL) + result = fskattr->store(entry->s, + fskobj, fskattr, buf, size); + else + result = 0; + put_sb_entry(entry); + } + return result; +} + +/* sysfs operations for file systems exporting information in /sys/fs */ +struct sysfs_ops fs_attr_ops = { + .show = fs_attr_show, + .store = fs_attr_store +}; +EXPORT_SYMBOL(fs_attr_ops); + /* Superblock refcounting */ /* @@ -172,6 +372,7 @@ void deactivate_super(struct super_block s->s_count -= S_BIAS-1; spin_unlock(&sb_lock); down_write(&s->s_umount); + kill_sb_entries(s); fs->kill_sb(s); put_filesystem(fs); put_super(s); @@ -444,7 +645,7 @@ rescan: } EXPORT_SYMBOL(get_super); - + struct super_block * user_get_super(dev_t dev) { struct list_head *p; diff -puN include/linux/fs.h~reiser4-kobject-umount-race include/linux/fs.h --- 25/include/linux/fs.h~reiser4-kobject-umount-race 2004-12-03 20:56:58.290898624 -0800 +++ 25-akpm/include/linux/fs.h 2004-12-03 20:56:58.296897712 -0800 @@ -755,6 +755,28 @@ extern int send_sigurg(struct fown_struc extern struct list_head super_blocks; extern spinlock_t sb_lock; +/* + * sb_entry is attached to some super block (register_sb_entry()). It + * can be detached manually (unregister_sb_entry()), or automatically by + * umount (see deactivate_super()). {get,put}_sb_entry() can be used to + * guarantee liveness of entry. Typical usage is to have sb_entry along + * with kobject or proc_dir_entry; see struct fs_kobject. + */ + +struct sb_entry { + /* linkage into &s->entries */ + struct list_head linkage; + /* true is entry is attached to @s */ + int live; + /* file system type for this entry. This is necessary, because + * entry liveness should be checkable in the situation when ->s + * may already be invalid. */ + struct file_system_type *type; + /* superblock this entry is attached to. Can only be inspected + * after successful call to get_sb_entry() */ + struct super_block *s; +}; + #define sb_entry(list) list_entry((list), struct super_block, s_list) #define S_BIAS (1<<30) struct super_block { @@ -808,8 +830,52 @@ struct super_block { /* Granuality of c/m/atime in ns. Cannot be worse than a second */ u32 s_time_gran; + struct list_head s_entries; /* list of + * sb_entries. Protected + * by ->s_umount. */ +}; + +/* + * file system kobject. + * + * fs_kobject is used to export per-super-block information in sysfs + * while providing synchronization against concurrent umount. To this + * end it includes struct sb_entry that is attached to the super block + * by fs_kobject_register() and detached by fs_kobject_unregister(). + * + * We need these wrappers (see fs/super.c:fs_kattr_{show,store}()), + * because it's impossible to handle module unloading races properly + * from within file-system code. Viz get_sb_entry() avoids umount races + * by acquiring reference to the super block (through sget()), but this + * may very well be the _last_ reference to the file-system, and + * put_sb_entry() will trigger module unload, which means that + * put_sb_entry() should be called from the generic code rather than + * from file-system module. + */ +struct fs_kobject { + struct sb_entry entry; + struct kobject kobj; /* it should be kobj for compatibility + * with silly kobject.h macros */ }; +/* + * file system kattr. See struct fs_kobject. + */ +struct fs_kattr { + struct attribute kattr; + ssize_t (*show) (struct super_block *, + struct fs_kobject *, struct fs_kattr *, char *); + ssize_t (*store) (struct super_block *, struct fs_kobject *, + struct fs_kattr *, const char *, size_t); +}; + +/* in fs/super.c */ + +int fs_kobject_register(struct super_block *s, struct fs_kobject *fskobj); +void fs_kobject_unregister(struct fs_kobject *fskobj); + +extern struct sysfs_ops fs_attr_ops; + extern struct timespec current_fs_time(struct super_block *sb); /* _