Date: Sat, 22 Sep 2001 01:54:10 -0400 (EDT)
From: Alexander Viro <viro@math.psu.edu>
To: Linus Torvalds <torvalds@transmeta.com>
Cc: Andrea Arcangeli <andrea@suse.de>
Subject: [PATCH] (1/6) further block_device cleanups
In-Reply-To: <20010922062525.O11674@athlon.random>
Message-ID: <Pine.GSO.4.21.0109220140390.11204-100000@weyl.math.psu.edu>
MIME-Version: 1.0
Content-Type: TEXT/PLAIN; charset=US-ASCII

	OK, folks.  Since it seems to survive the local beating (FWIW),
here is the beginning of sequence.  I'll send the first 6 patches -
see yourself how much you want to apply on the first stage.

Part 1:
	inodes with given ->i_bdev are sitting on a cyclic list,
anchored in bdev->bd_inodes and going through inode->i_devices.
	bd_acquire(inode) either bumps refcount on ->i_bdev or sets
->i_bdev (grabbing block_device).
	bd_forget(inode) takes inode from the list and resets ->i_bdev.
Called when we free inode.
	if bdput(bdev) decides to free block_device (->bd_count hits 0)
it goes through the list (if non-empty) and removes all inodes from it,
resetting their ->i_bdev as it goes.
	bd_acquire() is used in fs/devices.c and devfs instead of
setting ->i_bdev.

	Now we can start surrounding the areas where we need ->i_bdev
with bd_acquire()/bdput().  Once it's done we can drop bd_acquire()
from init_special_inode() and bdput() from freeing inode, thus reducing
the lifetime of block_device to the time it spends actually used.  That's
what will happen in next several chunks.

diff -urN S10-pre13-new/fs/block_dev.c S10-pre13-current/fs/block_dev.c
--- S10-pre13-new/fs/block_dev.c	Fri Sep 21 16:31:16 2001
+++ S10-pre13-current/fs/block_dev.c	Fri Sep 21 18:44:01 2001
@@ -437,6 +437,7 @@
 	{
 		memset(bdev, 0, sizeof(*bdev));
 		sema_init(&bdev->bd_sem, 1);
+		INIT_LIST_HEAD(&bdev->bd_inodes);
 	}
 }
 
@@ -522,16 +523,57 @@
 
 void bdput(struct block_device *bdev)
 {
-	if (atomic_dec_and_test(&bdev->bd_count)) {
+	if (atomic_dec_and_lock(&bdev->bd_count, &bdev_lock)) {
+		struct list_head *p;
 		if (bdev->bd_openers)
 			BUG();
 		if (bdev->bd_cache_openers)
 			BUG();
-		spin_lock(&bdev_lock);
 		list_del(&bdev->bd_hash);
+		while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
+			struct inode *inode;
+			inode = list_entry(p, struct inode, i_devices);
+			list_del_init(p);
+			inode->i_bdev = NULL;
+		}
 		spin_unlock(&bdev_lock);
 		destroy_bdev(bdev);
 	}
+}
+ 
+int bd_acquire(struct inode *inode)
+{
+	struct block_device *bdev;
+	spin_lock(&bdev_lock);
+	if (inode->i_bdev) {
+		atomic_inc(&inode->i_bdev->bd_count);
+		spin_unlock(&bdev_lock);
+		return 0;
+	}
+	spin_unlock(&bdev_lock);
+	bdev = bdget(kdev_t_to_nr(inode->i_rdev));
+	if (!bdev)
+		return -ENOMEM;
+	spin_lock(&bdev_lock);
+	if (!inode->i_bdev) {
+		inode->i_bdev = bdev;
+		list_add(&inode->i_devices, &bdev->bd_inodes);
+	} else if (inode->i_bdev != bdev)
+		BUG();
+	spin_unlock(&bdev_lock);
+	return 0;
+}
+
+/* Call when you free inode */
+
+void bd_forget(struct inode *inode)
+{
+	spin_lock(&bdev_lock);
+	if (inode->i_bdev) {
+		list_del_init(&inode->i_devices);
+		inode->i_bdev = NULL;
+	}
+	spin_unlock(&bdev_lock);
 }
 
 static struct {
diff -urN S10-pre13-new/fs/devfs/base.c S10-pre13-current/fs/devfs/base.c
--- S10-pre13-new/fs/devfs/base.c	Fri Sep 21 16:31:16 2001
+++ S10-pre13-current/fs/devfs/base.c	Fri Sep 21 18:45:02 2001
@@ -2351,9 +2351,7 @@
     {
 	inode->i_rdev = MKDEV (de->u.fcb.u.device.major,
 			       de->u.fcb.u.device.minor);
-	inode->i_bdev = bdget ( kdev_t_to_nr (inode->i_rdev) );
-	inode->i_mapping->a_ops = &def_blk_aops;
-	if (inode->i_bdev)
+	if (bd_acquire(inode) == 0)
 	{
 	    if (!inode->i_bdev->bd_op && de->u.fcb.ops)
 		inode->i_bdev->bd_op = de->u.fcb.ops;
diff -urN S10-pre13-new/fs/devices.c S10-pre13-current/fs/devices.c
--- S10-pre13-new/fs/devices.c	Thu May 24 18:26:44 2001
+++ S10-pre13-current/fs/devices.c	Fri Sep 21 18:44:01 2001
@@ -207,7 +207,7 @@
 	} else if (S_ISBLK(mode)) {
 		inode->i_fop = &def_blk_fops;
 		inode->i_rdev = to_kdev_t(rdev);
-		inode->i_bdev = bdget(rdev);
+		bd_acquire(inode);
 	} else if (S_ISFIFO(mode))
 		inode->i_fop = &def_fifo_fops;
 	else if (S_ISSOCK(mode))
diff -urN S10-pre13-new/fs/inode.c S10-pre13-current/fs/inode.c
--- S10-pre13-new/fs/inode.c	Fri Sep 21 09:45:26 2001
+++ S10-pre13-current/fs/inode.c	Fri Sep 21 18:44:01 2001
@@ -106,6 +106,7 @@
 		INIT_LIST_HEAD(&inode->i_dentry);
 		INIT_LIST_HEAD(&inode->i_dirty_buffers);
 		INIT_LIST_HEAD(&inode->i_dirty_data_buffers);
+		INIT_LIST_HEAD(&inode->i_devices);
 		sema_init(&inode->i_sem, 1);
 		sema_init(&inode->i_zombie, 1);
 		spin_lock_init(&inode->i_data.i_shared_lock);
@@ -518,7 +519,7 @@
 		inode->i_sb->s_op->clear_inode(inode);
 	if (inode->i_bdev) {
 		bdput(inode->i_bdev);
-		inode->i_bdev = NULL;
+		bd_forget(inode);
 	}
 	if (inode->i_cdev) {
 		cdput(inode->i_cdev);
diff -urN S10-pre13-new/include/linux/fs.h S10-pre13-current/include/linux/fs.h
--- S10-pre13-new/include/linux/fs.h	Fri Sep 21 09:45:29 2001
+++ S10-pre13-current/include/linux/fs.h	Fri Sep 21 18:44:01 2001
@@ -415,6 +415,7 @@
 	int			bd_cache_openers;
 	const struct block_device_operations *bd_op;
 	struct semaphore	bd_sem;	/* open/close mutex */
+	struct list_head	bd_inodes;
 };
 
 struct inode {
@@ -452,6 +453,7 @@
 	int			i_mapping_overload;
 	struct dquot		*i_dquot[MAXQUOTAS];
 	/* These three should probably be a union */
+	struct list_head	i_devices;
 	struct pipe_inode_info	*i_pipe;
 	struct block_device	*i_bdev;
 	struct char_device	*i_cdev;
@@ -1046,6 +1048,8 @@
 extern int register_blkdev(unsigned int, const char *, struct block_device_operations *);
 extern int unregister_blkdev(unsigned int, const char *);
 extern struct block_device *bdget(dev_t);
+extern int bd_acquire(struct inode *inode);
+extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern struct char_device *cdget(dev_t);
 extern void cdput(struct char_device *);