Date: Sat, 22 Sep 2001 01:54:10 -0400 (EDT) From: Alexander Viro To: Linus Torvalds Cc: Andrea Arcangeli Subject: [PATCH] (1/6) further block_device cleanups In-Reply-To: <20010922062525.O11674@athlon.random> Message-ID: MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII OK, folks. Since it seems to survive the local beating (FWIW), here is the beginning of sequence. I'll send the first 6 patches - see yourself how much you want to apply on the first stage. Part 1: inodes with given ->i_bdev are sitting on a cyclic list, anchored in bdev->bd_inodes and going through inode->i_devices. bd_acquire(inode) either bumps refcount on ->i_bdev or sets ->i_bdev (grabbing block_device). bd_forget(inode) takes inode from the list and resets ->i_bdev. Called when we free inode. if bdput(bdev) decides to free block_device (->bd_count hits 0) it goes through the list (if non-empty) and removes all inodes from it, resetting their ->i_bdev as it goes. bd_acquire() is used in fs/devices.c and devfs instead of setting ->i_bdev. Now we can start surrounding the areas where we need ->i_bdev with bd_acquire()/bdput(). Once it's done we can drop bd_acquire() from init_special_inode() and bdput() from freeing inode, thus reducing the lifetime of block_device to the time it spends actually used. That's what will happen in next several chunks. diff -urN S10-pre13-new/fs/block_dev.c S10-pre13-current/fs/block_dev.c --- S10-pre13-new/fs/block_dev.c Fri Sep 21 16:31:16 2001 +++ S10-pre13-current/fs/block_dev.c Fri Sep 21 18:44:01 2001 @@ -437,6 +437,7 @@ { memset(bdev, 0, sizeof(*bdev)); sema_init(&bdev->bd_sem, 1); + INIT_LIST_HEAD(&bdev->bd_inodes); } } @@ -522,16 +523,57 @@ void bdput(struct block_device *bdev) { - if (atomic_dec_and_test(&bdev->bd_count)) { + if (atomic_dec_and_lock(&bdev->bd_count, &bdev_lock)) { + struct list_head *p; if (bdev->bd_openers) BUG(); if (bdev->bd_cache_openers) BUG(); - spin_lock(&bdev_lock); list_del(&bdev->bd_hash); + while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { + struct inode *inode; + inode = list_entry(p, struct inode, i_devices); + list_del_init(p); + inode->i_bdev = NULL; + } spin_unlock(&bdev_lock); destroy_bdev(bdev); } +} + +int bd_acquire(struct inode *inode) +{ + struct block_device *bdev; + spin_lock(&bdev_lock); + if (inode->i_bdev) { + atomic_inc(&inode->i_bdev->bd_count); + spin_unlock(&bdev_lock); + return 0; + } + spin_unlock(&bdev_lock); + bdev = bdget(kdev_t_to_nr(inode->i_rdev)); + if (!bdev) + return -ENOMEM; + spin_lock(&bdev_lock); + if (!inode->i_bdev) { + inode->i_bdev = bdev; + list_add(&inode->i_devices, &bdev->bd_inodes); + } else if (inode->i_bdev != bdev) + BUG(); + spin_unlock(&bdev_lock); + return 0; +} + +/* Call when you free inode */ + +void bd_forget(struct inode *inode) +{ + spin_lock(&bdev_lock); + if (inode->i_bdev) { + list_del_init(&inode->i_devices); + inode->i_bdev = NULL; + } + spin_unlock(&bdev_lock); } static struct { diff -urN S10-pre13-new/fs/devfs/base.c S10-pre13-current/fs/devfs/base.c --- S10-pre13-new/fs/devfs/base.c Fri Sep 21 16:31:16 2001 +++ S10-pre13-current/fs/devfs/base.c Fri Sep 21 18:45:02 2001 @@ -2351,9 +2351,7 @@ { inode->i_rdev = MKDEV (de->u.fcb.u.device.major, de->u.fcb.u.device.minor); - inode->i_bdev = bdget ( kdev_t_to_nr (inode->i_rdev) ); - inode->i_mapping->a_ops = &def_blk_aops; - if (inode->i_bdev) + if (bd_acquire(inode) == 0) { if (!inode->i_bdev->bd_op && de->u.fcb.ops) inode->i_bdev->bd_op = de->u.fcb.ops; diff -urN S10-pre13-new/fs/devices.c S10-pre13-current/fs/devices.c --- S10-pre13-new/fs/devices.c Thu May 24 18:26:44 2001 +++ S10-pre13-current/fs/devices.c Fri Sep 21 18:44:01 2001 @@ -207,7 +207,7 @@ } else if (S_ISBLK(mode)) { inode->i_fop = &def_blk_fops; inode->i_rdev = to_kdev_t(rdev); - inode->i_bdev = bdget(rdev); + bd_acquire(inode); } else if (S_ISFIFO(mode)) inode->i_fop = &def_fifo_fops; else if (S_ISSOCK(mode)) diff -urN S10-pre13-new/fs/inode.c S10-pre13-current/fs/inode.c --- S10-pre13-new/fs/inode.c Fri Sep 21 09:45:26 2001 +++ S10-pre13-current/fs/inode.c Fri Sep 21 18:44:01 2001 @@ -106,6 +106,7 @@ INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_dirty_buffers); INIT_LIST_HEAD(&inode->i_dirty_data_buffers); + INIT_LIST_HEAD(&inode->i_devices); sema_init(&inode->i_sem, 1); sema_init(&inode->i_zombie, 1); spin_lock_init(&inode->i_data.i_shared_lock); @@ -518,7 +519,7 @@ inode->i_sb->s_op->clear_inode(inode); if (inode->i_bdev) { bdput(inode->i_bdev); - inode->i_bdev = NULL; + bd_forget(inode); } if (inode->i_cdev) { cdput(inode->i_cdev); diff -urN S10-pre13-new/include/linux/fs.h S10-pre13-current/include/linux/fs.h --- S10-pre13-new/include/linux/fs.h Fri Sep 21 09:45:29 2001 +++ S10-pre13-current/include/linux/fs.h Fri Sep 21 18:44:01 2001 @@ -415,6 +415,7 @@ int bd_cache_openers; const struct block_device_operations *bd_op; struct semaphore bd_sem; /* open/close mutex */ + struct list_head bd_inodes; }; struct inode { @@ -452,6 +453,7 @@ int i_mapping_overload; struct dquot *i_dquot[MAXQUOTAS]; /* These three should probably be a union */ + struct list_head i_devices; struct pipe_inode_info *i_pipe; struct block_device *i_bdev; struct char_device *i_cdev; @@ -1046,6 +1048,8 @@ extern int register_blkdev(unsigned int, const char *, struct block_device_operations *); extern int unregister_blkdev(unsigned int, const char *); extern struct block_device *bdget(dev_t); +extern int bd_acquire(struct inode *inode); +extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); extern struct char_device *cdget(dev_t); extern void cdput(struct char_device *);