diff -urN blkdev-pagecache-ref/drivers/block/rd.c blkdev-pagecache/drivers/block/rd.c --- blkdev-pagecache-ref/drivers/block/rd.c Wed Jul 11 02:50:33 2001 +++ blkdev-pagecache/drivers/block/rd.c Thu Jul 12 07:32:25 2001 @@ -186,6 +186,79 @@ #endif +static int rd_blkdev_pagecache_IO(int rw, struct buffer_head * sbh, int minor) +{ + struct address_space * mapping = rd_inode[minor]->i_mapping; + unsigned long index; + int offset, size, err = 0; + + if (sbh->b_page->mapping == mapping) { + if (rw != READ) + SetPageDirty(sbh->b_page); + goto out; + } + + index = sbh->b_rsector >> (PAGE_CACHE_SHIFT - 9); + offset = (sbh->b_rsector << 9) & ~PAGE_CACHE_MASK; + size = sbh->b_size; + + do { + int count; + struct page ** hash; + struct page * page; + const char * src; + char * dst; + int unlock = 0; + + count = PAGE_CACHE_SIZE - offset; + if (count > size) + count = size; + size -= count; + + hash = page_hash(mapping, index); + page = __find_get_page(mapping, index, hash); + if (!page && rw != READ) { + page = grab_cache_page(mapping, index); + err = -ENOMEM; + if (!page) + goto out; + err = 0; + unlock = 1; + } + + index++; + if (!page) { + offset = 0; + continue; + } + + if (rw == READ) { + src = kmap(page); + src += offset; + dst = bh_kmap(sbh); + } else { + dst = kmap(page); + dst += offset; + src = bh_kmap(sbh); + } + offset = 0; + + memcpy(dst, src, count); + + kunmap(page); + bh_kunmap(sbh); + + if (rw != READ) + SetPageDirty(page); + if (unlock) + UnlockPage(page); + __free_page(page); + } while (size); + + out: + return err; +} + /* * Basically, my strategy here is to set up a buffer-head which can't be * deleted, and make that my Ramdisk. If the request is outside of the @@ -198,10 +271,7 @@ { unsigned int minor; unsigned long offset, len; - struct buffer_head *rbh; - char *bdata; - minor = MINOR(sbh->b_rdev); if (minor >= NUM_RAMDISKS) @@ -221,20 +291,8 @@ goto fail; } - rbh = getblk(sbh->b_rdev, sbh->b_rsector/(sbh->b_size>>9), sbh->b_size); - /* I think that it is safe to assume that rbh is not in HighMem, though - * sbh might be - NeilBrown - */ - bdata = bh_kmap(sbh); - if (rw == READ) { - if (sbh != rbh) - memcpy(bdata, rbh->b_data, rbh->b_size); - } else - if (sbh != rbh) - memcpy(rbh->b_data, bdata, rbh->b_size); - bh_kunmap(sbh); - mark_buffer_protected(rbh); - brelse(rbh); + if (rd_blkdev_pagecache_IO(rw, sbh, minor)) + goto fail; sbh->b_end_io(sbh,1); return 0; @@ -259,10 +317,21 @@ /* special: we want to release the ramdisk memory, it's not like with the other blockdevices where this ioctl only flushes away the buffer cache. */ - if ((atomic_read(&inode->i_bdev->bd_openers) > 2)) - return -EBUSY; - destroy_buffers(inode->i_rdev); - rd_blocksizes[minor] = 0; + { + struct block_device * bdev = inode->i_bdev; + + down(&bdev->bd_sem); + if (bdev->bd_openers > 2) { + up(&bdev->bd_sem); + return -EBUSY; + } + bdev->bd_openers--; + bdev->bd_cache_openers--; + iput(rd_inode[minor]); + rd_inode[minor] = NULL; + rd_blocksizes[minor] = rd_blocksize; + up(&bdev->bd_sem); + } break; case BLKGETSIZE: /* Return device size */ @@ -302,21 +371,16 @@ { extern void free_initrd_mem(unsigned long, unsigned long); - lock_kernel(); if (!--initrd_users) { - blkdev_put(inode->i_bdev, BDEV_FILE); - iput(inode); free_initrd_mem(initrd_start, initrd_end); initrd_start = 0; } - unlock_kernel(); return 0; } static struct file_operations initrd_fops = { read: initrd_read, - release: initrd_release, }; #endif @@ -326,9 +390,15 @@ { #ifdef CONFIG_BLK_DEV_INITRD if (DEVICE_NR(inode->i_rdev) == INITRD_MINOR) { + static struct block_device_operations initrd_bd_op = { + open: rd_open, + release: initrd_release, + }; + if (!initrd_start) return -ENODEV; initrd_users++; filp->f_op = &initrd_fops; + inode->i_bdev->bd_op = &initrd_bd_op; return 0; } #endif @@ -341,8 +411,14 @@ */ if (rd_inode[DEVICE_NR(inode->i_rdev)] == NULL) { if (!inode->i_bdev) return -ENXIO; - if ((rd_inode[DEVICE_NR(inode->i_rdev)] = igrab(inode)) != NULL) - atomic_inc(&rd_inode[DEVICE_NR(inode->i_rdev)]->i_bdev->bd_openers); + if ((rd_inode[DEVICE_NR(inode->i_rdev)] = igrab(inode)) != NULL) { + struct block_device *bdev = inode->i_bdev; + + /* bdev->bd_sem is held by caller */ + bdev->bd_openers++; + bdev->bd_cache_openers++; + bdev->bd_inode = inode; + } } MOD_INC_USE_COUNT; @@ -356,7 +432,7 @@ return 0; } -static struct block_device_operations fd_fops = { +static struct block_device_operations rd_bd_op = { open: rd_open, release: rd_release, ioctl: rd_ioctl, @@ -371,7 +447,13 @@ for (i = 0 ; i < NUM_RAMDISKS; i++) { if (rd_inode[i]) { /* withdraw invalidate_buffers() and prune_icache() immunity */ - atomic_dec(&rd_inode[i]->i_bdev->bd_openers); + struct block_device *bdev = rd_inode[i]->i_bdev; + + down(&bdev->bd_sem); + bdev->bd_openers--; + bdev->bd_cache_openers--; + up(&bdev->bd_sem); + /* remove stale pointer to module address space */ rd_inode[i]->i_bdev->bd_op = NULL; iput(rd_inode[i]); @@ -400,7 +482,7 @@ rd_blocksize = BLOCK_SIZE; } - if (register_blkdev(MAJOR_NR, "ramdisk", &fd_fops)) { + if (register_blkdev(MAJOR_NR, "ramdisk", &rd_bd_op)) { printk("RAMDISK: Could not get major %d", MAJOR_NR); return -EIO; } @@ -418,14 +500,14 @@ devfs_register_series (devfs_handle, "%u", NUM_RAMDISKS, DEVFS_FL_DEFAULT, MAJOR_NR, 0, S_IFBLK | S_IRUSR | S_IWUSR, - &fd_fops, NULL); + &rd_bd_op, NULL); for (i = 0; i < NUM_RAMDISKS; i++) - register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &fd_fops, rd_size<<1); + register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &rd_bd_op, rd_size<<1); #ifdef CONFIG_BLK_DEV_INITRD /* We ought to separate initrd operations here */ - register_disk(NULL, MKDEV(MAJOR_NR,INITRD_MINOR), 1, &fd_fops, rd_size<<1); + register_disk(NULL, MKDEV(MAJOR_NR,INITRD_MINOR), 1, &rd_bd_op, rd_size<<1); #endif hardsect_size[MAJOR_NR] = rd_hardsec; /* Size of the RAM disk blocks */ @@ -594,8 +676,10 @@ outfile.f_op = &def_blk_fops; init_special_inode(out_inode, S_IFBLK | S_IRUSR | S_IWUSR, kdev_t_to_nr(ram_device)); - if (blkdev_open(inode, &infile) != 0) + if (blkdev_open(inode, &infile) != 0) { + iput(out_inode); goto free_inode; + } if (blkdev_open(out_inode, &outfile) != 0) goto free_inodes; @@ -658,14 +742,15 @@ if (i && (i % devblocks == 0)) { printk("done disk #%d.\n", i/devblocks); rotate = 0; - invalidate_buffers(device); - if (infile.f_op->release) - infile.f_op->release(inode, &infile); + if (blkdev_close(inode, &infile) != 0) { + printk("Error closing the disk.\n"); + goto noclose_input; + } printk("Please insert disk #%d and press ENTER\n", i/devblocks+1); wait_for_keypress(); if (blkdev_open(inode, &infile) != 0) { printk("Error opening disk.\n"); - goto done; + goto noclose_input; } infile.f_pos = 0; printk("Loading disk #%d... ", i/devblocks+1); @@ -683,19 +768,20 @@ kfree(buf); successful_load: - invalidate_buffers(device); ROOT_DEV = MKDEV(MAJOR_NR, unit); if (ROOT_DEVICE_NAME != NULL) strcpy (ROOT_DEVICE_NAME, "rd/0"); done: - if (infile.f_op->release) - infile.f_op->release(inode, &infile); - blkdev_put(out_inode->i_bdev, BDEV_FILE); + blkdev_close(inode, &infile); +noclose_input: + blkdev_close(out_inode, &outfile); + iput(inode); + iput(out_inode); set_fs(fs); return; free_inodes: /* free inodes on error */ iput(out_inode); - blkdev_put(inode->i_bdev, BDEV_FILE); + blkdev_close(inode, &infile); free_inode: iput(inode); } diff -urN blkdev-pagecache-ref/fs/block_dev.c blkdev-pagecache/fs/block_dev.c --- blkdev-pagecache-ref/fs/block_dev.c Thu Jul 12 06:52:49 2001 +++ blkdev-pagecache/fs/block_dev.c Thu Jul 12 07:28:23 2001 @@ -2,6 +2,7 @@ * linux/fs/block_dev.c * * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2001 Andrea Arcangeli SuSE */ #include @@ -14,311 +15,299 @@ #include #include #include +#include +#include +#include #include -extern int *blk_size[]; -extern int *blksize_size[]; +static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result) +{ + int err; + + err = -EIO; + if (iblock >= buffered_blk_size(inode->i_rdev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS)) + goto out; -#define MAX_BUF_PER_PAGE (PAGE_SIZE / 512) -#define NBUF 64 + bh_result->b_blocknr = iblock; + bh_result->b_state |= 1UL << BH_Mapped; + err = 0; + + out: + return err; +} -ssize_t block_write(struct file * filp, const char * buf, - size_t count, loff_t *ppos) +static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize) { - struct inode * inode = filp->f_dentry->d_inode; - ssize_t blocksize, blocksize_bits, i, buffercount, write_error; - ssize_t block, blocks; - loff_t offset; - ssize_t chars; - ssize_t written, retval; - struct buffer_head * bhlist[NBUF]; - size_t size; - kdev_t dev = inode->i_rdev; - struct buffer_head * bh, *bufferlist[NBUF]; - register char * p; + int i, nr_blocks, retval, dev = inode->i_rdev; + unsigned long * blocks = iobuf->blocks; - if (is_read_only(dev)) - return -EPERM; + if (blocksize != BUFFERED_BLOCKSIZE) + BUG(); - retval = written = write_error = buffercount = 0; - blocksize = BLOCK_SIZE; - if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)]) - blocksize = blksize_size[MAJOR(dev)][MINOR(dev)]; - - i = blocksize; - blocksize_bits = 0; - while(i != 1) { - blocksize_bits++; - i >>= 1; - } - - block = *ppos >> blocksize_bits; - offset = *ppos & (blocksize-1); - - if (blk_size[MAJOR(dev)]) - size = ((loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS) >> blocksize_bits; - else - size = INT_MAX; - while (count>0) { - if (block >= size) { - retval = -ENOSPC; - goto cleanup; - } - chars = blocksize - offset; - if (chars > count) - chars=count; - -#if 0 - /* get the buffer head */ - { - struct buffer_head * (*fn)(kdev_t, int, int) = getblk; - if (chars != blocksize) - fn = bread; - bh = fn(dev, block, blocksize); - if (!bh) { - retval = -EIO; - goto cleanup; - } - if (!buffer_uptodate(bh)) - wait_on_buffer(bh); + nr_blocks = iobuf->length >> BUFFERED_BLOCKSIZE_BITS; + /* build the blocklist */ + for (i = 0; i < nr_blocks; i++, blocknr++) { + struct buffer_head bh; + + retval = blkdev_get_block(inode, blocknr, &bh); + if (retval) + goto out; + + blocks[i] = bh.b_blocknr; + } + + retval = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, blocksize); + + out: + return retval; +} + +static int blkdev_writepage(struct page * page) +{ + int err, i; + unsigned long block; + struct buffer_head *bh, *head; + struct inode *inode = page->mapping->host; + + if (!PageLocked(page)) + BUG(); + + if (!page->buffers) + create_empty_buffers(page, inode->i_rdev, BUFFERED_BLOCKSIZE); + head = page->buffers; + + block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS); + + bh = head; + i = 0; + + /* Stage 1: make sure we have all the buffers mapped! */ + do { + /* + * If the buffer isn't up-to-date, we can't be sure + * that the buffer has been initialized with the proper + * block number information etc.. + * + * Leave it to the low-level FS to make all those + * decisions (block #0 may actually be a valid block) + */ + if (!buffer_mapped(bh)) { + err = blkdev_get_block(inode, block, bh); + if (err) + goto out; } -#else - bh = getblk(dev, block, blocksize); - if (!bh) { - retval = -EIO; - goto cleanup; - } - - if (!buffer_uptodate(bh)) - { - if (chars == blocksize) - wait_on_buffer(bh); - else - { - bhlist[0] = bh; - if (!filp->f_reada || !read_ahead[MAJOR(dev)]) { - /* We do this to force the read of a single buffer */ - blocks = 1; - } else { - /* Read-ahead before write */ - blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9) / 2; - if (block + blocks > size) blocks = size - block; - if (blocks > NBUF) blocks=NBUF; - if (!blocks) blocks = 1; - for(i=1; i= 0) brelse(bhlist[i--]); - retval = -EIO; - goto cleanup; - } - } - } - ll_rw_block(READ, blocks, bhlist); - for(i=1; ib_this_page; block++; - p = offset + bh->b_data; - offset = 0; - *ppos += chars; - written += chars; - count -= chars; - copy_from_user(p,buf,chars); - p += chars; - buf += chars; - mark_buffer_uptodate(bh, 1); - mark_buffer_dirty(bh); - if (filp->f_flags & O_SYNC) - bufferlist[buffercount++] = bh; - else - brelse(bh); - if (buffercount == NBUF){ - ll_rw_block(WRITE, buffercount, bufferlist); - for(i=0; ib_count); + set_bit(BH_Uptodate, &bh->b_state); + clear_bit(BH_Dirty, &bh->b_state); + bh = bh->b_this_page; + } while (bh != head); + + /* Stage 3: submit the IO */ + do { + submit_bh(WRITE, bh); + bh = bh->b_this_page; + } while (bh != head); + + /* Done - end_buffer_io_async will unlock */ + SetPageUptodate(page); + return 0; + +out: + ClearPageUptodate(page); + UnlockPage(page); + return err; +} + +static int blkdev_readpage(struct file * file, struct page * page) +{ + struct inode *inode = page->mapping->host; + kdev_t dev = inode->i_rdev; + unsigned long iblock, lblock; + struct buffer_head *bh, *head, *arr[1 << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS)]; + unsigned int blocks; + int nr, i; + + if (!PageLocked(page)) + PAGE_BUG(page); + if (!page->buffers) + create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE); + head = page->buffers; + + blocks = PAGE_CACHE_SIZE >> BUFFERED_BLOCKSIZE_BITS; + iblock = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS); + lblock = buffered_blk_size(dev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS); + bh = head; + nr = 0; + i = 0; + + do { + if (buffer_uptodate(bh)) + continue; + + if (!buffer_mapped(bh)) { + if (iblock <= lblock) { + if (blkdev_get_block(inode, iblock, bh)) + continue; + } + if (!buffer_mapped(bh)) { + memset(kmap(page) + i * BUFFERED_BLOCKSIZE, 0, BUFFERED_BLOCKSIZE); + flush_dcache_page(page); + kunmap(page); + set_bit(BH_Uptodate, &bh->b_state); + continue; } - buffercount=0; + /* get_block() might have updated the buffer synchronously */ + if (buffer_uptodate(bh)) + continue; } - balance_dirty(dev); - if (write_error) - break; - } - cleanup: - if ( buffercount ){ - ll_rw_block(WRITE, buffercount, bufferlist); - for(i=0; if_reada = 1; - if(write_error) - return -EIO; - return written ? written : retval; -} - -ssize_t block_read(struct file * filp, char * buf, size_t count, loff_t *ppos) -{ - struct inode * inode = filp->f_dentry->d_inode; - size_t block; - loff_t offset; - ssize_t blocksize; - ssize_t blocksize_bits, i; - size_t blocks, rblocks, left; - int bhrequest, uptodate; - struct buffer_head ** bhb, ** bhe; - struct buffer_head * buflist[NBUF]; - struct buffer_head * bhreq[NBUF]; - unsigned int chars; - loff_t size; - kdev_t dev; - ssize_t read; - dev = inode->i_rdev; - blocksize = BLOCK_SIZE; - if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)]) - blocksize = blksize_size[MAJOR(dev)][MINOR(dev)]; - i = blocksize; - blocksize_bits = 0; - while (i != 1) { - blocksize_bits++; - i >>= 1; - } - - offset = *ppos; - if (blk_size[MAJOR(dev)]) - size = (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS; - else - size = (loff_t) INT_MAX << BLOCK_SIZE_BITS; - - if (offset > size) - left = 0; - /* size - offset might not fit into left, so check explicitly. */ - else if (size - offset > INT_MAX) - left = INT_MAX; - else - left = size - offset; - if (left > count) - left = count; - if (left <= 0) + arr[nr] = bh; + nr++; + } while (i++, iblock++, (bh = bh->b_this_page) != head); + + if (!nr) { + /* + * all buffers are uptodate - we can set the page + * uptodate as well. + */ + SetPageUptodate(page); + UnlockPage(page); return 0; - read = 0; - block = offset >> blocksize_bits; - offset &= blocksize-1; - size >>= blocksize_bits; - rblocks = blocks = (left + offset + blocksize - 1) >> blocksize_bits; - bhb = bhe = buflist; - if (filp->f_reada) { - if (blocks < read_ahead[MAJOR(dev)] / (blocksize >> 9)) - blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9); - if (rblocks > blocks) - blocks = rblocks; - - } - if (block + blocks > size) { - blocks = size - block; - if (blocks == 0) - return 0; - } - - /* We do this in a two stage process. We first try to request - as many blocks as we can, then we wait for the first one to - complete, and then we try to wrap up as many as are actually - done. This routine is rather generic, in that it can be used - in a filesystem by substituting the appropriate function in - for getblk. + } + + /* Stage two: lock the buffers */ + for (i = 0; i < nr; i++) { + struct buffer_head * bh = arr[i]; + lock_buffer(bh); + set_buffer_async_io(bh); + atomic_inc(&bh->b_count); + } - This routine is optimized to make maximum use of the various - buffers and caches. */ + /* Stage 3: start the IO */ + for (i = 0; i < nr; i++) + submit_bh(READ, arr[i]); - do { - bhrequest = 0; - uptodate = 1; - while (blocks) { - --blocks; - *bhb = getblk(dev, block++, blocksize); - if (*bhb && !buffer_uptodate(*bhb)) { - uptodate = 0; - bhreq[bhrequest++] = *bhb; - } + return 0; +} - if (++bhb == &buflist[NBUF]) - bhb = buflist; +static int __blkdev_prepare_write(struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + kdev_t dev = inode->i_rdev; + unsigned block_start, block_end; + unsigned long block; + int err = 0; + struct buffer_head *bh, *head, *wait[2], **wait_bh=wait; + kmap(page); + + if (!page->buffers) + create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE); + head = page->buffers; + + block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS); + + for(bh = head, block_start = 0; bh != head || !block_start; + block++, block_start=block_end, bh = bh->b_this_page) { + if (!bh) + BUG(); + block_end = block_start + BUFFERED_BLOCKSIZE; + if (block_end <= from) + continue; + if (block_start >= to) + break; + if (!buffer_mapped(bh)) { + err = blkdev_get_block(inode, block, bh); + if (err) + goto out; + } + if (Page_Uptodate(page)) { + set_bit(BH_Uptodate, &bh->b_state); + continue; + } + if (!buffer_uptodate(bh) && + (block_start < from || block_end > to)) { + ll_rw_block(READ, 1, &bh); + *wait_bh++=bh; + } + } + /* + * If we issued read requests - let them complete. + */ + while(wait_bh > wait) { + wait_on_buffer(*--wait_bh); + err = -EIO; + if (!buffer_uptodate(*wait_bh)) + goto out; + } + return 0; +out: + return err; +} - /* If the block we have on hand is uptodate, go ahead - and complete processing. */ - if (uptodate) - break; - if (bhb == bhe) - break; - } - - /* Now request them all */ - if (bhrequest) { - ll_rw_block(READ, bhrequest, bhreq); - } - - do { /* Finish off all I/O that has actually completed */ - if (*bhe) { - wait_on_buffer(*bhe); - if (!buffer_uptodate(*bhe)) { /* read error? */ - brelse(*bhe); - if (++bhe == &buflist[NBUF]) - bhe = buflist; - left = 0; - break; - } - } - if (left < blocksize - offset) - chars = left; - else - chars = blocksize - offset; - *ppos += chars; - left -= chars; - read += chars; - if (*bhe) { - copy_to_user(buf,offset+(*bhe)->b_data,chars); - brelse(*bhe); - buf += chars; - } else { - while (chars-- > 0) - put_user(0,buf++); +static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + int err = __blkdev_prepare_write(inode, page, from, to); + if (err) { + ClearPageUptodate(page); + kunmap(page); + } + return err; +} + +static int __blkdev_commit_write(struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + unsigned block_start, block_end; + int partial = 0, need_balance_dirty = 0; + struct buffer_head *bh, *head; + + for(bh = head = page->buffers, block_start = 0; + bh != head || !block_start; + block_start=block_end, bh = bh->b_this_page) { + block_end = block_start + BUFFERED_BLOCKSIZE; + if (block_end <= from || block_start >= to) { + if (!buffer_uptodate(bh)) + partial = 1; + } else { + set_bit(BH_Uptodate, &bh->b_state); + if (!atomic_set_buffer_dirty(bh)) { + __mark_dirty(bh); + buffer_insert_inode_data_queue(bh, inode); + need_balance_dirty = 1; } - offset = 0; - if (++bhe == &buflist[NBUF]) - bhe = buflist; - } while (left > 0 && bhe != bhb && (!*bhe || !buffer_locked(*bhe))); - if (bhe == bhb && !blocks) - break; - } while (left > 0); + } + } + + if (need_balance_dirty) + balance_dirty(bh->b_dev); + /* + * is this a partial write that happened to make all buffers + * uptodate then we can optimize away a bogus readpage() for + * the next read(). Here we 'discover' wether the page went + * uptodate as a result of this (potentially partial) write. + */ + if (!partial) + SetPageUptodate(page); + return 0; +} -/* Release the read-ahead blocks */ - while (bhe != bhb) { - brelse(*bhe); - if (++bhe == &buflist[NBUF]) - bhe = buflist; - }; - if (!read) - return -EIO; - filp->f_reada = 1; - return read; +static int blkdev_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + __blkdev_commit_write(inode,page,from,to); + kunmap(page); + return 0; } /* @@ -354,6 +343,17 @@ } +static int __block_fsync(struct inode * inode) +{ + int ret; + + filemap_fdatasync(inode->i_mapping); + ret = sync_buffers(inode->i_rdev, 1); + filemap_fdatawait(inode->i_mapping); + + return ret; +} + /* * Filp may be NULL when we are called by an msync of a vma * since the vma has no handle. @@ -361,7 +361,9 @@ static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) { - return fsync_dev(dentry->d_inode->i_rdev); + struct inode * inode = dentry->d_inode; + + return __block_fsync(inode); } /* @@ -452,6 +454,7 @@ atomic_set(&new_bdev->bd_count,1); new_bdev->bd_dev = dev; new_bdev->bd_op = NULL; + new_bdev->bd_inode = NULL; spin_lock(&bdev_lock); bdev = bdfind(dev, head); if (!bdev) { @@ -467,9 +470,11 @@ void bdput(struct block_device *bdev) { if (atomic_dec_and_test(&bdev->bd_count)) { - spin_lock(&bdev_lock); - if (atomic_read(&bdev->bd_openers)) + if (bdev->bd_openers) + BUG(); + if (bdev->bd_cache_openers) BUG(); + spin_lock(&bdev_lock); list_del(&bdev->bd_hash); spin_unlock(&bdev_lock); destroy_bdev(bdev); @@ -638,9 +643,10 @@ ret = 0; if (bdev->bd_op->open) ret = bdev->bd_op->open(fake_inode, &fake_file); - if (!ret) - atomic_inc(&bdev->bd_openers); - else if (!atomic_read(&bdev->bd_openers)) + if (!ret) { + bdev->bd_openers++; + atomic_inc(&bdev->bd_count); + } else if (!bdev->bd_openers) bdev->bd_op = NULL; iput(fake_inode); } @@ -653,6 +659,7 @@ { int ret = -ENXIO; struct block_device *bdev = inode->i_bdev; + down(&bdev->bd_sem); lock_kernel(); if (!bdev->bd_op) @@ -661,9 +668,21 @@ ret = 0; if (bdev->bd_op->open) ret = bdev->bd_op->open(inode,filp); - if (!ret) - atomic_inc(&bdev->bd_openers); - else if (!atomic_read(&bdev->bd_openers)) + if (!ret) { + bdev->bd_openers++; + if (!bdev->bd_cache_openers && bdev->bd_inode) + BUG(); + if (bdev->bd_cache_openers && !bdev->bd_inode) + BUG(); + if (!bdev->bd_cache_openers++) + bdev->bd_inode = inode; + else { + if (bdev->bd_inode != inode && !inode->i_mapping_overload++) { + inode->i_mapping = bdev->bd_inode->i_mapping; + atomic_inc(&bdev->bd_inode->i_count); + } + } + } else if (!bdev->bd_openers) bdev->bd_op = NULL; } unlock_kernel(); @@ -676,14 +695,12 @@ int ret = 0; kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */ down(&bdev->bd_sem); - /* syncing will go here */ lock_kernel(); if (kind == BDEV_FILE || kind == BDEV_FS) fsync_dev(rdev); - if (atomic_dec_and_test(&bdev->bd_openers)) { - /* invalidating buffers will go here */ + /* only filesystems uses buffer cache for the metadata these days */ + if (kind == BDEV_FS) invalidate_buffers(rdev); - } if (bdev->bd_op->release) { struct inode * fake_inode = get_empty_inode(); ret = -ENOMEM; @@ -691,19 +708,69 @@ fake_inode->i_rdev = rdev; ret = bdev->bd_op->release(fake_inode, NULL); iput(fake_inode); - } + } else + printk(KERN_WARNING "blkdev_put: ->release couldn't be run due -ENOMEM\n"); } - if (!atomic_read(&bdev->bd_openers)) + if (!--bdev->bd_openers) bdev->bd_op = NULL; /* we can't rely on driver being */ /* kind to stay around. */ unlock_kernel(); up(&bdev->bd_sem); + bdput(bdev); return ret; } -static int blkdev_close(struct inode * inode, struct file * filp) +int blkdev_close(struct inode * inode, struct file * filp) { - return blkdev_put(inode->i_bdev, BDEV_FILE); + struct block_device *bdev = inode->i_bdev; + int ret = 0; + struct inode * bd_inode = bdev->bd_inode; + + if (bd_inode->i_mapping != inode->i_mapping) + BUG(); + down(&bdev->bd_sem); + /* cache coherency protocol */ + if (!--bdev->bd_cache_openers) { + struct super_block * sb; + + /* flush the pagecache to disk */ + __block_fsync(inode); + /* drop the pagecache, uptodate info is on disk by now */ + truncate_inode_pages(inode->i_mapping, 0); + /* forget the bdev pagecache address space */ + bdev->bd_inode = NULL; + + /* + * Now only if an underlying fs is mounted ro we'll + * try to refill its pinned buffer cache from disk which + * we can do race free since we assume the fs isn't allowed + * to write anything. + */ + sb = get_super(inode->i_rdev); + if (sb && sb->s_flags & MS_RDONLY) { + /* + * Invalidate as much as possible, the fs is ro so + * there's nothing dirty to loose in the highlevel caches. + */ + invalidate_device(inode->i_rdev, 0); + + /* now refill the obsolete pinned buffers from disk */ + update_buffers(inode->i_rdev); + } + } + if (inode != bd_inode && !--inode->i_mapping_overload) { + inode->i_mapping = &inode->i_data; + iput(bd_inode); + } + + /* release the device driver */ + if (bdev->bd_op->release) + ret = bdev->bd_op->release(inode, NULL); + if (!--bdev->bd_openers) + bdev->bd_op = NULL; + up(&bdev->bd_sem); + + return ret; } static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, @@ -714,12 +781,22 @@ return -EINVAL; } +struct address_space_operations def_blk_aops = { + readpage: blkdev_readpage, + writepage: blkdev_writepage, + sync_page: block_sync_page, + prepare_write: blkdev_prepare_write, + commit_write: blkdev_commit_write, + direct_IO: blkdev_direct_IO, +}; + struct file_operations def_blk_fops = { open: blkdev_open, release: blkdev_close, llseek: block_llseek, - read: block_read, - write: block_write, + read: generic_file_read, + write: generic_file_write, + mmap: generic_file_mmap, fsync: block_fsync, ioctl: blkdev_ioctl, }; diff -urN blkdev-pagecache-ref/fs/buffer.c blkdev-pagecache/fs/buffer.c --- blkdev-pagecache-ref/fs/buffer.c Wed Jul 11 02:50:33 2001 +++ blkdev-pagecache/fs/buffer.c Wed Jul 11 02:50:07 2001 @@ -293,7 +293,7 @@ * We will ultimately want to put these in a separate list, but for * now we search all of the lists for dirty buffers. */ -static int sync_buffers(kdev_t dev, int wait) +int sync_buffers(kdev_t dev, int wait) { int err = 0; @@ -637,7 +637,6 @@ return ret; } - /* If invalidate_buffers() will trash dirty buffers, it means some kind of fs corruption is going on. Trashing dirty data always imply losing information that was supposed to be just stored on the physical layer @@ -657,8 +656,16 @@ These are two special cases. Normal usage imply the device driver to issue a sync on the device (without waiting I/O completion) and - then an invalidate_buffers call that doesn't trash dirty buffers. */ -void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers) + then an invalidate_buffers call that doesn't trash dirty buffers. + + For handling cache coherency with the blkdev pagecache the 'update' case + is been introduced. It is needed to re-read from disk any pinned + buffer. NOTE: re-reading from disk is destructive so we can do it only + when we assume nobody is changing the buffercache under our I/O and when + we think the disk contains more recent information than the buffercache. + The update == 1 pass marks the buffers we need to update, the update == 2 + pass does the actual I/O. */ +void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers, int update) { int i, nlist, slept; struct buffer_head * bh, * bh_next; @@ -689,13 +696,36 @@ } write_lock(&hash_table_lock); - if (!atomic_read(&bh->b_count) && - (destroy_dirty_buffers || !buffer_dirty(bh))) { - remove_inode_queue(bh); - __remove_from_queues(bh); - put_last_free(bh); + /* All buffers in the lru lists are mapped */ + if (!buffer_mapped(bh)) + BUG(); + if (!atomic_read(&bh->b_count)) { + if (destroy_dirty_buffers || !buffer_dirty(bh)) { + remove_inode_queue(bh); + __remove_from_queues(bh); + put_last_free(bh); + } + } else if (update) { + if ((update == 2) ^ buffer_uptodate(bh) && + (update == 2) ^ buffer_req(bh)) { + write_unlock(&hash_table_lock); + atomic_inc(&bh->b_count); + spin_unlock(&lru_list_lock); + + if (update == 2) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + } else { + lock_buffer(bh); + clear_bit(BH_Uptodate, &bh->b_state); + clear_bit(BH_Req, &bh->b_state); + unlock_buffer(bh); + } + + atomic_dec(&bh->b_count); + goto retry; + } } - /* else complain loudly? */ write_unlock(&hash_table_lock); if (slept) @@ -1185,7 +1215,7 @@ wakeup_bdflush(state); } -static __inline__ void __mark_dirty(struct buffer_head *bh) +inline void __mark_dirty(struct buffer_head *bh) { bh->b_flushtime = jiffies + bdf_prm.b_un.age_buffer; refile_buffer(bh); @@ -1218,8 +1248,6 @@ dispose = BUF_LOCKED; if (buffer_dirty(bh)) dispose = BUF_DIRTY; - if (buffer_protected(bh)) - dispose = BUF_PROTECTED; if (dispose != bh->b_list) { __remove_from_lru_list(bh, bh->b_list); bh->b_list = dispose; @@ -1259,7 +1287,7 @@ /* grab the lru lock here to block bdflush. */ spin_lock(&lru_list_lock); write_lock(&hash_table_lock); - if (!atomic_dec_and_test(&buf->b_count) || buffer_locked(buf) || buffer_protected(buf)) + if (!atomic_dec_and_test(&buf->b_count) || buffer_locked(buf)) goto in_use; __hash_unlink(buf); write_unlock(&hash_table_lock); @@ -1531,7 +1559,7 @@ return 1; } -static void create_empty_buffers(struct page *page, kdev_t dev, unsigned long blocksize) +void create_empty_buffers(struct page *page, kdev_t dev, unsigned long blocksize) { struct buffer_head *bh, *head, *tail; @@ -2487,7 +2515,7 @@ /* * Can the buffer be thrown out? */ -#define BUFFER_BUSY_BITS ((1<b_count) | ((bh)->b_state & BUFFER_BUSY_BITS)) /* @@ -2571,9 +2599,8 @@ #ifdef CONFIG_SMP struct buffer_head * bh; int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0; - int protected = 0; int nlist; - static char *buf_types[NR_LIST] = { "CLEAN", "LOCKED", "DIRTY", "PROTECTED", }; + static char *buf_types[NR_LIST] = { "CLEAN", "LOCKED", "DIRTY", }; #endif printk("Buffer memory: %6dkB\n", @@ -2583,7 +2610,7 @@ if (!spin_trylock(&lru_list_lock)) return; for(nlist = 0; nlist < NR_LIST; nlist++) { - found = locked = dirty = used = lastused = protected = 0; + found = locked = dirty = used = lastused = 0; bh = lru_list[nlist]; if(!bh) continue; @@ -2591,8 +2618,6 @@ found++; if (buffer_locked(bh)) locked++; - if (buffer_protected(bh)) - protected++; if (buffer_dirty(bh)) dirty++; if (atomic_read(&bh->b_count)) @@ -2606,9 +2631,9 @@ buf_types[nlist], found, tmp); } printk("%9s: %d buffers, %lu kbyte, %d used (last=%d), " - "%d locked, %d protected, %d dirty\n", + "%d locked, %d dirty\n", buf_types[nlist], found, size_buffers_type[nlist]>>10, - used, lastused, locked, protected, dirty); + used, lastused, locked, dirty); } spin_unlock(&lru_list_lock); #endif diff -urN blkdev-pagecache-ref/fs/devices.c blkdev-pagecache/fs/devices.c --- blkdev-pagecache-ref/fs/devices.c Wed Jul 11 02:50:33 2001 +++ blkdev-pagecache/fs/devices.c Wed Jul 11 02:50:07 2001 @@ -206,6 +206,7 @@ inode->i_cdev = cdget(rdev); } else if (S_ISBLK(mode)) { inode->i_fop = &def_blk_fops; + inode->i_mapping->a_ops = &def_blk_aops; inode->i_rdev = to_kdev_t(rdev); inode->i_bdev = bdget(rdev); } else if (S_ISFIFO(mode)) diff -urN blkdev-pagecache-ref/fs/inode.c blkdev-pagecache/fs/inode.c --- blkdev-pagecache-ref/fs/inode.c Thu Jul 12 05:42:53 2001 +++ blkdev-pagecache/fs/inode.c Thu Jul 12 05:42:16 2001 @@ -135,6 +135,9 @@ { struct super_block * sb = inode->i_sb; + if (!sb) + return; + /* Don't do this for I_DIRTY_PAGES - that doesn't actually dirty the inode itself */ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { if (sb->s_op && sb->s_op->dirty_inode) diff -urN blkdev-pagecache-ref/fs/super.c blkdev-pagecache/fs/super.c --- blkdev-pagecache-ref/fs/super.c Wed Jul 4 04:03:46 2001 +++ blkdev-pagecache/fs/super.c Thu Jul 12 06:33:57 2001 @@ -1814,7 +1814,11 @@ blivet = do_umount(old_rootmnt, 0); mntput(old_rootmnt); if (!blivet) { - ioctl_by_bdev(ramdisk, BLKFLSBUF, 0); + int ioctl_err; + + ioctl_err = ioctl_by_bdev(ramdisk, BLKFLSBUF, 0); + if (ioctl_err) + printk("failed to release ramdisk %d...", ioctl_err); printk("okay\n"); error = 0; } diff -urN blkdev-pagecache-ref/include/linux/blkdev.h blkdev-pagecache/include/linux/blkdev.h --- blkdev-pagecache-ref/include/linux/blkdev.h Wed Jul 11 02:50:33 2001 +++ blkdev-pagecache/include/linux/blkdev.h Wed Jul 11 19:54:55 2001 @@ -215,4 +215,15 @@ #define blk_started_io(nsects) \ atomic_add(nsects, &queued_sectors); +static inline int buffered_blk_size(kdev_t dev) +{ + int ret = INT_MAX; + int major = MAJOR(dev); + + if (blk_size[major]) + ret = blk_size[major][MINOR(dev)] + ((BUFFERED_BLOCKSIZE-1) >> BLOCK_SIZE_BITS); + + return ret; +} + #endif diff -urN blkdev-pagecache-ref/include/linux/fs.h blkdev-pagecache/include/linux/fs.h --- blkdev-pagecache-ref/include/linux/fs.h Wed Jul 11 02:50:33 2001 +++ blkdev-pagecache/include/linux/fs.h Wed Jul 11 19:54:54 2001 @@ -46,6 +46,10 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1<b_data & ~PAGE_MASK) @@ -407,9 +409,10 @@ struct block_device { struct list_head bd_hash; atomic_t bd_count; -/* struct address_space bd_data; */ + struct inode * bd_inode; dev_t bd_dev; /* not a kdev_t - it's a search key */ - atomic_t bd_openers; + int bd_openers; + int bd_cache_openers; const struct block_device_operations *bd_op; struct semaphore bd_sem; /* open/close mutex */ }; @@ -445,7 +448,8 @@ wait_queue_head_t i_wait; struct file_lock *i_flock; struct address_space *i_mapping; - struct address_space i_data; + struct address_space i_data; + int i_mapping_overload; struct dquot *i_dquot[MAXQUOTAS]; /* These three should probably be a union */ struct pipe_inode_info *i_pipe; @@ -1045,7 +1049,9 @@ extern struct char_device *cdget(dev_t); extern void cdput(struct char_device *); extern int blkdev_open(struct inode *, struct file *); +extern int blkdev_close(struct inode *, struct file *); extern struct file_operations def_blk_fops; +extern struct address_space_operations def_blk_aops; extern struct file_operations def_fifo_fops; extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); extern int blkdev_get(struct block_device *, mode_t, unsigned, int); @@ -1084,8 +1090,7 @@ #define BUF_CLEAN 0 #define BUF_LOCKED 1 /* Buffers scheduled for write */ #define BUF_DIRTY 2 /* Dirty buffers, not yet scheduled for write */ -#define BUF_PROTECTED 3 /* Ramdisk persistent storage */ -#define NR_LIST 4 +#define NR_LIST 3 /* * This is called by bh->b_end_io() handlers when I/O has completed. @@ -1111,21 +1116,10 @@ __mark_buffer_clean(bh); } -#define atomic_set_buffer_protected(bh) test_and_set_bit(BH_Protected, &(bh)->b_state) - -static inline void __mark_buffer_protected(struct buffer_head *bh) -{ - refile_buffer(bh); -} - -static inline void mark_buffer_protected(struct buffer_head * bh) -{ - if (!atomic_set_buffer_protected(bh)) - __mark_buffer_protected(bh); -} - +extern void FASTCALL(__mark_dirty(struct buffer_head *bh)); extern void FASTCALL(__mark_buffer_dirty(struct buffer_head *bh)); extern void FASTCALL(mark_buffer_dirty(struct buffer_head *bh)); +extern void FASTCALL(buffer_insert_inode_data_queue(struct buffer_head *, struct inode *)); #define atomic_set_buffer_dirty(bh) test_and_set_bit(BH_Dirty, &(bh)->b_state) @@ -1166,12 +1160,18 @@ extern void invalidate_inode_pages(struct inode *); extern void invalidate_inode_pages2(struct address_space *); extern void invalidate_inode_buffers(struct inode *); -#define invalidate_buffers(dev) __invalidate_buffers((dev), 0) -#define destroy_buffers(dev) __invalidate_buffers((dev), 1) -extern void __invalidate_buffers(kdev_t dev, int); +#define invalidate_buffers(dev) __invalidate_buffers((dev), 0, 0) +#define destroy_buffers(dev) __invalidate_buffers((dev), 1, 0) +#define update_buffers(dev) \ +do { \ + __invalidate_buffers((dev), 0, 1); \ + __invalidate_buffers((dev), 0, 2); \ +} while (0) +extern void __invalidate_buffers(kdev_t dev, int, int); extern void sync_inodes(kdev_t); extern void sync_unlocked_inodes(void); extern void write_inode_now(struct inode *, int); +extern int sync_buffers(kdev_t, int); extern void sync_dev(kdev_t); extern int fsync_dev(kdev_t); extern int fsync_super(struct super_block *); @@ -1355,6 +1355,7 @@ int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int block_truncate_page(struct address_space *, loff_t, get_block_t *); extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *); +extern void create_empty_buffers(struct page *, kdev_t, unsigned long); extern int waitfor_one_page(struct page*); extern int generic_file_mmap(struct file *, struct vm_area_struct *); diff -urN blkdev-pagecache-ref/include/linux/swap.h blkdev-pagecache/include/linux/swap.h --- blkdev-pagecache-ref/include/linux/swap.h Wed Jul 11 02:50:33 2001 +++ blkdev-pagecache/include/linux/swap.h Wed Jul 11 02:52:02 2001 @@ -278,9 +278,6 @@ #include #endif -#define page_ramdisk(page) \ - (page->buffers && (MAJOR(page->buffers->b_dev) == RAMDISK_MAJOR)) - extern spinlock_t swaplock; #define swap_list_lock() spin_lock(&swaplock) diff -urN blkdev-pagecache-ref/kernel/ksyms.c blkdev-pagecache/kernel/ksyms.c --- blkdev-pagecache-ref/kernel/ksyms.c Wed Jul 11 02:50:33 2001 +++ blkdev-pagecache/kernel/ksyms.c Wed Jul 11 02:50:07 2001 @@ -261,6 +261,7 @@ EXPORT_SYMBOL(__find_lock_page); EXPORT_SYMBOL(grab_cache_page); EXPORT_SYMBOL(read_cache_page); +EXPORT_SYMBOL(__find_get_page); EXPORT_SYMBOL(vfs_readlink); EXPORT_SYMBOL(vfs_follow_link); EXPORT_SYMBOL(page_readlink); @@ -293,8 +294,6 @@ EXPORT_SYMBOL(tty_std_termios); /* block device driver support */ -EXPORT_SYMBOL(block_read); -EXPORT_SYMBOL(block_write); EXPORT_SYMBOL(blksize_size); EXPORT_SYMBOL(hardsect_size); EXPORT_SYMBOL(blk_size); diff -urN blkdev-pagecache-ref/mm/filemap.c blkdev-pagecache/mm/filemap.c --- blkdev-pagecache-ref/mm/filemap.c Wed Jul 11 02:50:33 2001 +++ blkdev-pagecache/mm/filemap.c Wed Jul 11 03:02:06 2001 @@ -1058,16 +1058,42 @@ return max_readahead[MAJOR(inode->i_dev)][MINOR(inode->i_dev)]; } +static inline unsigned long calc_end_index(struct inode * inode) +{ + unsigned long end_index; + + if (!S_ISBLK(inode->i_mode)) + end_index = inode->i_size >> PAGE_CACHE_SHIFT; + else + end_index = buffered_blk_size(inode->i_rdev) >> (PAGE_CACHE_SHIFT - BLOCK_SIZE_BITS); + + return end_index; +} + +static inline loff_t calc_rsize(struct inode * inode) +{ + loff_t rsize; + + if (!S_ISBLK(inode->i_mode)) + rsize = inode->i_size; + else + rsize = (loff_t) buffered_blk_size(inode->i_rdev) << BLOCK_SIZE_BITS; + + return rsize; +} + static void generic_file_readahead(int reada_ok, struct file * filp, struct inode * inode, struct page * page) { - unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; + unsigned long end_index; unsigned long index = page->index; unsigned long max_ahead, ahead; unsigned long raend; int max_readahead = get_max_readahead(inode); + end_index = calc_end_index(inode); + raend = filp->f_raend; max_ahead = 0; @@ -1232,13 +1258,17 @@ struct page *page, **hash; unsigned long end_index, nr, ret; - end_index = inode->i_size >> PAGE_CACHE_SHIFT; + end_index = calc_end_index(inode); + if (index > end_index) break; nr = PAGE_CACHE_SIZE; if (index == end_index) { - nr = inode->i_size & ~PAGE_CACHE_MASK; - if (nr <= offset) + if (!S_ISBLK(inode->i_mode)) { + nr = inode->i_size & ~PAGE_CACHE_MASK; + if (nr <= offset) + break; + } else break; } @@ -1410,9 +1440,14 @@ new_iobuf = 1; } - blocksize = inode->i_sb->s_blocksize; + if (!S_ISBLK(inode->i_mode)) { + blocksize = inode->i_sb->s_blocksize; + blocksize_bits = inode->i_sb->s_blocksize_bits; + } else { + blocksize = BUFFERED_BLOCKSIZE; + blocksize_bits = BUFFERED_BLOCKSIZE_BITS; + } blocksize_mask = blocksize - 1; - blocksize_bits = inode->i_sb->s_blocksize_bits; chunk_size = KIO_MAX_ATOMIC_IO << 10; retval = -EINVAL; @@ -1526,11 +1561,13 @@ o_direct: { - loff_t pos = *ppos; + loff_t pos = *ppos, size; struct inode * inode = filp->f_dentry->d_inode; - if (pos + count > inode->i_size) - count = inode->i_size - pos; + size = calc_rsize(inode); + if (pos + count > size) + count = size - pos; + retval = generic_file_direct_IO(READ, filp, buf, count, pos); if (retval > 0) *ppos = pos + retval; @@ -1721,6 +1758,7 @@ struct address_space *mapping = inode->i_mapping; struct page *page, **hash, *old_page; unsigned long size, pgoff; + loff_t rsize; pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; @@ -1729,7 +1767,8 @@ * An external ptracer can access pages that normally aren't * accessible.. */ - size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + rsize = calc_rsize(inode); + size = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if ((pgoff >= size) && (area->vm_mm == current->mm)) return NULL; @@ -1970,22 +2009,7 @@ return error; } -/* - * Shared mappings need to be able to do the right thing at - * close/unmap/sync. They will also use the private file as - * backing-store for swapping.. - */ -static struct vm_operations_struct file_shared_mmap = { - nopage: filemap_nopage, -}; - -/* - * Private mappings just need to be able to load in the map. - * - * (This is actually used for shared mappings as well, if we - * know they can't ever get write permissions..) - */ -static struct vm_operations_struct file_private_mmap = { +static struct vm_operations_struct generic_file_vm_ops = { nopage: filemap_nopage, }; @@ -1993,21 +2017,18 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma) { - struct vm_operations_struct * ops; struct inode *inode = file->f_dentry->d_inode; - ops = &file_private_mmap; if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { if (!inode->i_mapping->a_ops->writepage) return -EINVAL; - ops = &file_shared_mmap; } if (!inode->i_sb || !S_ISREG(inode->i_mode)) return -EACCES; if (!inode->i_mapping->a_ops->readpage) return -ENOEXEC; UPDATE_ATIME(inode); - vma->vm_ops = ops; + vma->vm_ops = &generic_file_vm_ops; return 0; } @@ -2242,13 +2263,14 @@ long error = -EBADF; struct file * file; unsigned long size, rlim_rss; + loff_t rsize; /* Doesn't work if there's no mapped file. */ if (!vma->vm_file) return error; file = vma->vm_file; - size = (file->f_dentry->d_inode->i_size + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; + rsize = calc_rsize(file->f_dentry->d_inode); + size = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; if (end > vma->vm_end) @@ -2761,8 +2783,11 @@ written = 0; - if (file->f_flags & O_APPEND) + if (file->f_flags & O_APPEND) { + if (S_ISBLK(inode->i_mode)) + goto out; pos = inode->i_size; + } /* * Check whether we've reached the file size limit. @@ -2804,25 +2829,26 @@ * Linus frestrict idea will clean these up nicely.. */ - if (pos >= inode->i_sb->s_maxbytes) - { - if (count || pos > inode->i_sb->s_maxbytes) { - send_sig(SIGXFSZ, current, 0); - err = -EFBIG; - goto out; + if (!S_ISBLK(inode->i_mode)) { + if (pos >= inode->i_sb->s_maxbytes) + { + if (count || pos > inode->i_sb->s_maxbytes) { + send_sig(SIGXFSZ, current, 0); + err = -EFBIG; + goto out; + } + /* zero-length writes at ->s_maxbytes are OK */ } - /* zero-length writes at ->s_maxbytes are OK */ - } - if (pos + count > inode->i_sb->s_maxbytes) - count = inode->i_sb->s_maxbytes - pos; + if (pos + count > inode->i_sb->s_maxbytes) + count = inode->i_sb->s_maxbytes - pos; + } if (count == 0) { err = 0; goto out; } - status = 0; remove_suid(inode); inode->i_ctime = inode->i_mtime = CURRENT_TIME; mark_inode_dirty_sync(inode); @@ -2922,7 +2948,7 @@ written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos); if (written > 0) { loff_t end = pos + err; - if (end > inode->i_size) { + if (end > inode->i_size && !S_ISBLK(inode->i_mode)) { inode->i_size = end; mark_inode_dirty(inode); } diff -urN blkdev-pagecache-ref/mm/swap.c blkdev-pagecache/mm/swap.c --- blkdev-pagecache-ref/mm/swap.c Wed Jul 11 02:50:33 2001 +++ blkdev-pagecache/mm/swap.c Wed Jul 11 02:50:07 2001 @@ -179,7 +179,7 @@ * Don't touch it if it's not on the active list. * (some pages aren't on any list at all) */ - if (PageActive(page) && page_count(page) <= maxcount && !page_ramdisk(page)) { + if (PageActive(page) && page_count(page) <= maxcount) { del_page_from_active_list(page); add_page_to_inactive_dirty_list(page); } diff -urN blkdev-pagecache-ref/mm/vmscan.c blkdev-pagecache/mm/vmscan.c --- blkdev-pagecache-ref/mm/vmscan.c Wed Jul 11 02:50:33 2001 +++ blkdev-pagecache/mm/vmscan.c Thu Jul 12 07:43:07 2001 @@ -454,8 +454,7 @@ /* Page is or was in use? Move it to the active list. */ if (PageReferenced(page) || page->age > 0 || - (!page->buffers && page_count(page) > 1) || - page_ramdisk(page)) { + (!page->buffers && page_count(page) > 1)) { del_page_from_inactive_dirty_list(page); add_page_to_active_list(page); continue;