From: Andrea Arcangeli I don't think we need an install_swap_bdev/remove_swap_bdev anymore, we should use the swap_info->bdev, not the swap_bdevs. the swap_info already has a ->bdev field, the only point of remove_swap_bdev/install_swap_bdev was to unplug all devices as efficiently as possible, we don't need that anymore with the page parameter. Plus the semaphore should be a rwsem to allow parallel unplug from multiple pages. After that I don't need to take the semaphore anymore during swapon, no swapcache with swp_type() pointing to such bdev, will be allowed until swapon is complete (SWP_ACTIVE is set a lot later after setting p->bdev). In swapoff I only need a dummy serialization with the readers, after try_to_unuse is complete: err = try_to_unuse(type); current->flags &= ~PF_SWAPOFF; /* wait for any unplug function to finish */ down_write(&swap_unplug_sem); up_write(&swap_unplug_sem); that's all, no other locking and no install_swap_bdev/remove_swap_bdev. (and the swap_bdevs[] compression code was busted) --- 25-akpm/mm/swapfile.c | 74 ++++++++++++++------------------------------ drivers/block/ll_rw_blk.c | 0 fs/buffer.c | 0 include/linux/backing-dev.h | 0 include/linux/blkdev.h | 0 include/linux/swap.h | 0 mm/readahead.c | 0 7 files changed, 25 insertions(+), 49 deletions(-) diff -puN drivers/block/ll_rw_blk.c~swap-speedups-and-fix drivers/block/ll_rw_blk.c diff -puN fs/buffer.c~swap-speedups-and-fix fs/buffer.c diff -puN include/linux/backing-dev.h~swap-speedups-and-fix include/linux/backing-dev.h diff -puN include/linux/blkdev.h~swap-speedups-and-fix include/linux/blkdev.h diff -puN include/linux/swap.h~swap-speedups-and-fix include/linux/swap.h diff -puN mm/readahead.c~swap-speedups-and-fix mm/readahead.c diff -puN mm/swapfile.c~swap-speedups-and-fix mm/swapfile.c --- 25/mm/swapfile.c~swap-speedups-and-fix 2004-05-12 21:01:25.758751368 -0700 +++ 25-akpm/mm/swapfile.c 2004-05-12 21:01:25.762750760 -0700 @@ -48,61 +48,38 @@ struct swap_info_struct swap_info[MAX_SW static DECLARE_MUTEX(swapon_sem); /* - * Array of backing blockdevs, for swap_unplug_fn. We need this because the - * bdev->unplug_fn can sleep and we cannot hold swap_list_lock while calling - * the unplug_fn. And swap_list_lock cannot be turned into a semaphore. + * We need this because the bdev->unplug_fn can sleep and we cannot + * hold swap_list_lock while calling the unplug_fn. And swap_list_lock + * cannot be turned into a semaphore. */ -static DECLARE_MUTEX(swap_bdevs_sem); -static struct block_device *swap_bdevs[MAX_SWAPFILES]; +static DECLARE_RWSEM(swap_unplug_sem); #define SWAPFILE_CLUSTER 256 -/* - * Caller holds swap_bdevs_sem - */ -static void install_swap_bdev(struct block_device *bdev) -{ - int i; - - for (i = 0; i < MAX_SWAPFILES; i++) { - if (swap_bdevs[i] == NULL) { - swap_bdevs[i] = bdev; - return; - } - } - BUG(); -} - -static void remove_swap_bdev(struct block_device *bdev) -{ - int i; - - for (i = 0; i < MAX_SWAPFILES; i++) { - if (swap_bdevs[i] == bdev) { - memcpy(&swap_bdevs[i], &swap_bdevs[i + 1], - (MAX_SWAPFILES - i - 1) * sizeof(*swap_bdevs)); - swap_bdevs[MAX_SWAPFILES - 1] = NULL; - return; - } - } - BUG(); -} - void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page) { - int i; + swp_entry_t entry; - down(&swap_bdevs_sem); - for (i = 0; i < MAX_SWAPFILES; i++) { - struct block_device *bdev = swap_bdevs[i]; + down_read(&swap_unplug_sem); + entry.val = page->private; + if (PageSwapCache(page)) { + struct block_device *bdev = swap_info[swp_type(entry)].bdev; struct backing_dev_info *bdi; - if (bdev == NULL) - break; + /* + * If the page is removed from swapcache from under us (with a + * racy try_to_unuse/swapoff) we need an additional reference + * count to avoid reading garbage from page->private above. If + * the WARN_ON triggers during a swapoff it maybe the race + * condition and it's harmless. However if it triggers without + * swapoff it signals a problem. + */ + WARN_ON(page_count(page) <= 1); + bdi = bdev->bd_inode->i_mapping->backing_dev_info; bdi->unplug_io_fn(bdi, page); } - up(&swap_bdevs_sem); + up_read(&swap_unplug_sem); } static inline int scan_swap_map(struct swap_info_struct *si) @@ -1136,6 +1113,11 @@ asmlinkage long sys_swapoff(const char _ current->flags |= PF_SWAPOFF; err = try_to_unuse(type); current->flags &= ~PF_SWAPOFF; + + /* wait for any unplug function to finish */ + down_write(&swap_unplug_sem); + up_write(&swap_unplug_sem); + if (err) { /* re-insert swap space back into swap_list */ swap_list_lock(); @@ -1154,7 +1136,6 @@ asmlinkage long sys_swapoff(const char _ goto out_dput; } down(&swapon_sem); - down(&swap_bdevs_sem); swap_list_lock(); swap_device_lock(p); swap_file = p->swap_file; @@ -1166,8 +1147,6 @@ asmlinkage long sys_swapoff(const char _ destroy_swap_extents(p); swap_device_unlock(p); swap_list_unlock(); - remove_swap_bdev(p->bdev); - up(&swap_bdevs_sem); up(&swapon_sem); vfree(swap_map); if (S_ISBLK(mapping->host->i_mode)) { @@ -1511,7 +1490,6 @@ asmlinkage long sys_swapon(const char __ goto bad_swap; down(&swapon_sem); - down(&swap_bdevs_sem); swap_list_lock(); swap_device_lock(p); p->flags = SWP_ACTIVE; @@ -1537,8 +1515,6 @@ asmlinkage long sys_swapon(const char __ } swap_device_unlock(p); swap_list_unlock(); - install_swap_bdev(p->bdev); - up(&swap_bdevs_sem); up(&swapon_sem); error = 0; goto out; _