Fix up the ramdisk driver so it mostly works. - Mark ramdisk-backed address_spaces as not accounting for dirty memory: keeping the ramdisk inode pages dirty is the only thing which protects them from page reclaim and we don't want them permanently elevating the dirty page stats. This means that the files which live atop the ramdisk device also don't contribute to dirty memory. That's unfortunate, because they should. Can't do much about this because the ramdisk inode and those file share the same backing_dev_info. But regular kupdate writeout, vmscan writepage and sync() will all correctly force the file pages into the ramdisk inode pagecache. We could fix this up by adding a (new) blockdev.file_backing_dev_info and use that in alloc_inode instead of always adopting the blockdev inode's backing_dev_info. Or a blockdev->prepare_inode() or whatever. We'll see. - We don't need the ramdisk special-case in fs-writeback.c any more. - Fix rd_ioctl() to return -ENOTTY on invalid ioctl types, not -EINVAL. - Make ramdisk Kconfig friendlier. --- 25-akpm/drivers/block/Kconfig | 7 ++-- 25-akpm/drivers/block/rd.c | 59 +++++++++++++++++++++++++++++++++++------- 25-akpm/fs/fs-writeback.c | 5 --- 3 files changed, 54 insertions(+), 17 deletions(-) diff -puN drivers/block/rd.c~ramdisk-fixes drivers/block/rd.c --- 25/drivers/block/rd.c~ramdisk-fixes 2004-05-17 21:53:59.728849864 -0700 +++ 25-akpm/drivers/block/rd.c 2004-05-17 23:25:50.480088336 -0700 @@ -57,6 +57,8 @@ #include /* for invalidate_bdev() */ #include #include +#include + #include /* The RAM disk size is now a parameter */ @@ -119,20 +121,48 @@ static int ramdisk_prepare_write(struct kunmap_atomic(kaddr, KM_USER0); SetPageUptodate(page); } - SetPageDirty(page); return 0; } static int ramdisk_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) { + set_page_dirty(page); + return 0; +} + +/* + * ->writepage to the the blockdev's mapping has to redirty the page so that the + * VM doesn't go and steal it. We return WRITEPAGE_ACTIVATE so that the VM + * won't try to (pointlessly) write the page again for a while. + * + * Really, these pages should not be on the LRU at all. + */ +static int ramdisk_writepage(struct page *page, struct writeback_control *wbc) +{ + redirty_page_for_writepage(wbc, page); + if (wbc->for_reclaim) + return WRITEPAGE_ACTIVATE; + unlock_page(page); + return 0; +} + +/* + * ramdisk blockdev pages have their own ->set_page_dirty() because we don't + * want them to contribute to dirty memory accounting. + */ +static int ramdisk_set_page_dirty(struct page *page) +{ + SetPageDirty(page); return 0; } static struct address_space_operations ramdisk_aops = { - .readpage = ramdisk_readpage, - .prepare_write = ramdisk_prepare_write, - .commit_write = ramdisk_commit_write, + .readpage = ramdisk_readpage, + .prepare_write = ramdisk_prepare_write, + .commit_write = ramdisk_commit_write, + .writepage = ramdisk_writepage, + .set_page_dirty = ramdisk_set_page_dirty, }; static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector, @@ -195,11 +225,11 @@ static int rd_blkdev_pagecache_IO(int rw if (rw == READ) { flush_dcache_page(vec->bv_page); } else { - SetPageDirty(page); + set_page_dirty(page); } if (unlock) unlock_page(page); - __free_page(page); + put_page(page); } while (size); out: @@ -251,7 +281,7 @@ static int rd_ioctl(struct inode *inode, struct block_device *bdev = inode->i_bdev; if (cmd != BLKFLSBUF) - return -EINVAL; + return -ENOTTY; /* * special: we want to release the ramdisk memory, it's not like with @@ -268,11 +298,22 @@ static int rd_ioctl(struct inode *inode, return error; } +/* + * The backing_dev_info is shared between files which are backed by the ramdisk + * inode and by the ramdisk inode itself. This is a bit unfortunate because + * they really want separate semantics. The files *do* want full writeback + * and dirty-memory accounting treatment, whereas the ramdisk blockdev mapping + * wants neither. + * + * So we make things look like a regular blockdev and the cheat in various ways + * in the ramdisk inode's a_ops. + */ + static struct backing_dev_info rd_backing_dev_info = { .ra_pages = 0, /* No readahead */ .no_dirty_acct = 1, /* Does not contribute to dirty memory */ - .no_writeback = 1, /* Does not need writeback */ - .unplug_io_fn = default_unplug_io_fn, + .no_writeback = 0, /* DOES need writeback */ + .unplug_io_fn = default_unplug_io_fn, }; static int rd_open(struct inode *inode, struct file *filp) diff -puN fs/fs-writeback.c~ramdisk-fixes fs/fs-writeback.c --- 25/fs/fs-writeback.c~ramdisk-fixes 2004-05-17 21:53:59.729849712 -0700 +++ 25-akpm/fs/fs-writeback.c 2004-05-17 21:53:59.735848800 -0700 @@ -310,11 +310,6 @@ sync_sb_inodes(struct super_block *sb, s list_move(&inode->i_list, &sb->s_dirty); continue; } - /* - * Assume that all inodes on this superblock are memory - * backed. Skip the superblock. - */ - break; } if (wbc->nonblocking && bdi_write_congested(bdi)) { diff -puN drivers/block/Kconfig~ramdisk-fixes drivers/block/Kconfig --- 25/drivers/block/Kconfig~ramdisk-fixes 2004-05-17 21:53:59.730849560 -0700 +++ 25-akpm/drivers/block/Kconfig 2004-05-17 21:53:59.736848648 -0700 @@ -314,12 +314,13 @@ config BLK_DEV_RAM thus say N here. config BLK_DEV_RAM_SIZE - int "Default RAM disk size" + int "Default RAM disk size (kbytes)" depends on BLK_DEV_RAM default "4096" help - The default value is 4096. Only change this if you know what are - you doing. If you are using IBM S/390, then set this to 8192. + The default value is 4096 kilobytes. Only change this if you know + what are you doing. If you are using IBM S/390, then set this to + 8192. config BLK_DEV_INITRD bool "Initial RAM disk (initrd) support" _