diff options
author | jdike <jdike> | 2003-08-29 16:43:56 +0000 |
---|---|---|
committer | jdike <jdike> | 2003-08-29 16:43:56 +0000 |
commit | b04ac96add7fafe4c9b7b86640365eedb6094ca6 (patch) | |
tree | bfddefd0c1d4b1c1f3ec1fe056046c3381677abf | |
parent | 12f53e2fac2ea2a9460fadde83c43364d9e5f171 (diff) | |
download | uml-history-b04ac96add7fafe4c9b7b86640365eedb6094ca6.tar.gz |
Added mmap support.
-rw-r--r-- | arch/um/drivers/ubd_kern.c | 365 | ||||
-rw-r--r-- | arch/um/include/ubd_user.h | 8 |
2 files changed, 332 insertions, 41 deletions
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 0e65792..bbbc392 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -37,9 +37,12 @@ #include "mconsole_kern.h" #include "init.h" #include "irq_user.h" +#include "irq_kern.h" #include "ubd_user.h" #include "2_5compat.h" #include "os.h" +#include "mem.h" +#include "mem_kern.h" static int ubd_open(struct inode * inode, struct file * filp); static int ubd_release(struct inode * inode, struct file * file); @@ -51,6 +54,10 @@ static int ubd_revalidate1(kdev_t rdev); #define MAX_DEV (8) #define MAX_MINOR (MAX_DEV << UBD_SHIFT) +/* Changed in early boot */ +static int ubd_do_mmap = 0; +#define UBD_MMAP_BLOCK_SIZE PAGE_SIZE + /* Not modified by this driver */ static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE }; static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 }; @@ -134,6 +141,12 @@ struct ubd { devfs_handle_t devfs; int no_cow; struct cow cow; + + int map_writes; + int map_reads; + int nomap_writes; + int nomap_reads; + int write_maps; }; #define DEFAULT_COW { \ @@ -154,6 +167,11 @@ struct ubd { .devfs = NULL, \ .no_cow = 0, \ .cow = DEFAULT_COW, \ + .map_writes = 0, \ + .map_reads = 0, \ + .nomap_writes = 0, \ + .nomap_reads = 0, \ + .write_maps = 0, \ } struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; @@ -271,6 +289,13 @@ static int ubd_setup_common(char *str, int *index_out) int major; str++; + if(!strcmp(str, "mmap")){ + CHOOSE_MODE(printk("mmap not supported by the ubd " + "driver in tt mode\n"), + ubd_do_mmap = 1); + return(0); + } + if(!strcmp(str, "sync")){ global_openflags.s = 1; return(0); @@ -426,7 +451,7 @@ static void ubd_finish(int error) static void ubd_handler(void) { struct io_thread_req req; - int n; + int n, err; DEVICE_INTR = NULL; intr_count++; @@ -440,9 +465,18 @@ static void ubd_handler(void) return; } - if((req.offset != ((__u64) (CURRENT->sector)) << 9) || - (req.length != (CURRENT->current_nr_sectors) << 9)) + if((req.op != UBD_MMAP) && + ((req.offset != ((__u64) (CURRENT->sector)) << 9) || + (req.length != (CURRENT->current_nr_sectors) << 9))) panic("I/O op mismatch"); + + if(req.map_fd != -1){ + err = physmem_subst_mapping(req.buffer, req.map_fd, + req.map_offset, 1); + if(err) + printk("ubd_handler - physmem_subst_mapping failed, " + "err = %d\n", err); + } spin_lock(&io_request_lock); ubd_finish(req.error); @@ -483,6 +517,9 @@ static int ubd_add(int n) if(err) goto out; + if(dev->cow.file == NULL) + blk_sizes[n] = UBD_MMAP_BLOCK_SIZE; + sprintf(name, "%d", n); dev->devfs = devfs_register(ubd_dir_handle, name, DEVFS_FL_REMOVABLE, MAJOR_NR, n << UBD_SHIFT, S_IFBLK | @@ -717,7 +754,7 @@ static int ubd_open_dev(struct ubd *dev) if((dev->fd == -ENOENT) && create_cow){ dev->fd = create_cow_file(dev->file, dev->cow.file, - dev->openflags, 1 << 9, + dev->openflags, 1 << 9, PAGE_SIZE, &dev->cow.bitmap_offset, &dev->cow.bitmap_len, &dev->cow.data_offset); @@ -817,55 +854,147 @@ static int ubd_release(struct inode * inode, struct file * file) return(0); } -void cowify_req(struct io_thread_req *req, struct ubd *dev) +static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, + __u64 *cow_offset, unsigned long *bitmap, + __u64 bitmap_offset, unsigned long *bitmap_words) +{ + __u64 sector = io_offset >> 9; + int i, update_bitmap = 0; + + for(i = 0; i < length >> 9; i++){ + if(cow_mask != NULL) + ubd_set_bit(i, (unsigned char *) cow_mask); + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + continue; + + update_bitmap = 1; + ubd_set_bit(sector + i, (unsigned char *) bitmap); + } + + if(!update_bitmap) + return; + + *cow_offset = sector / (sizeof(unsigned long) * 8); + bitmap_words[0] = bitmap[*cow_offset]; + bitmap_words[1] = bitmap[*cow_offset + 1]; + + *cow_offset *= sizeof(unsigned long); + *cow_offset += bitmap_offset; +} + +static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, + __u64 bitmap_offset) { - int i, update_bitmap, sector = req->offset >> 9; + __u64 sector = req->offset >> 9; + int i; if(req->length > (sizeof(req->sector_mask) * 8) << 9) panic("Operation too long"); + if(req->op == UBD_READ) { for(i = 0; i < req->length >> 9; i++){ - if(ubd_test_bit(sector + i, (unsigned char *) - dev->cow.bitmap)){ + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)){ ubd_set_bit(i, (unsigned char *) &req->sector_mask); } } } - else { - update_bitmap = 0; - for(i = 0; i < req->length >> 9; i++){ - ubd_set_bit(i, (unsigned char *) - &req->sector_mask); - if(!ubd_test_bit(sector + i, (unsigned char *) - dev->cow.bitmap)) - update_bitmap = 1; - ubd_set_bit(sector + i, (unsigned char *) - dev->cow.bitmap); - } - if(update_bitmap){ - req->cow_offset = sector / (sizeof(unsigned long) * 8); - req->bitmap_words[0] = - dev->cow.bitmap[req->cow_offset]; - req->bitmap_words[1] = - dev->cow.bitmap[req->cow_offset + 1]; - req->cow_offset *= sizeof(unsigned long); - req->cow_offset += dev->cow.bitmap_offset; + else cowify_bitmap(req->offset, req->length, &req->sector_mask, + &req->cow_offset, bitmap, bitmap_offset, + req->bitmap_words); +} + +static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset) +{ + __u64 sector; + unsigned char *bitmap; + int bit, i; + + /* mmap must have been requested on the command line */ + if(!ubd_do_mmap) + return(-1); + + /* The buffer must be page aligned */ + if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0) + return(-1); + + /* The request must be a page long */ + if((req->current_nr_sectors << 9) != PAGE_SIZE) + return(-1); + + if(dev->cow.file == NULL) + return(dev->fd); + + sector = offset >> 9; + bitmap = (unsigned char *) dev->cow.bitmap; + bit = ubd_test_bit(sector, bitmap); + + for(i = 1; i < req->current_nr_sectors; i++){ + if(ubd_test_bit(sector + i, bitmap) != bit) + return(-1); + } + + if(bit) + offset += dev->cow.data_offset; + + /* The data on disk must be page aligned */ + if((offset % UBD_MMAP_BLOCK_SIZE) != 0) + return(-1); + + return(bit ? dev->fd : dev->cow.fd); +} + +static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset, + struct request *req, + struct io_thread_req *io_req) +{ + int err; + + if(req->cmd == WRITE){ + /* Writes are almost no-ops since the new data is already in the + * host page cache + */ + dev->map_writes++; + if(dev->cow.file != NULL) + cowify_bitmap(io_req->offset, io_req->length, + &io_req->sector_mask, &io_req->cow_offset, + dev->cow.bitmap, dev->cow.bitmap_offset, + io_req->bitmap_words); + } + else { + int w; + + if((dev->cow.file != NULL) && (fd == dev->cow.fd)) + w = 0; + else w = dev->openflags.w; + + if((dev->cow.file != NULL) && (fd == dev->fd)) + offset += dev->cow.data_offset; + + err = physmem_subst_mapping(req->buffer, fd, offset, w); + if(err){ + printk("physmem_subst_mapping failed, err = %d\n", err); + return(1); } + dev->map_reads++; } + io_req->op = UBD_MMAP; + io_req->buffer = req->buffer; + return(0); } static int prepare_request(struct request *req, struct io_thread_req *io_req) { struct ubd *dev; - __u64 block; - int nsect, minor, n; + __u64 offset; + int minor, n, len, fd; if(req->rq_status == RQ_INACTIVE) return(1); minor = MINOR(req->rq_dev); n = minor >> UBD_SHIFT; dev = &ubd_dev[n]; + if(IS_WRITE(req) && !dev->openflags.w){ printk("Write attempted on readonly ubd device %d\n", n); end_request(0); @@ -873,23 +1002,47 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req) } req->sector += ubd_part[minor].start_sect; - block = req->sector; - nsect = req->current_nr_sectors; + offset = req->sector << 9; + len = req->current_nr_sectors << 9; - io_req->op = (req->cmd == READ) ? UBD_READ : UBD_WRITE; io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; io_req->fds[1] = dev->fd; + io_req->map_fd = -1; + io_req->cow_offset = -1; + io_req->offset = offset; + io_req->length = len; + io_req->error = 0; + io_req->sector_mask = 0; + + fd = mmap_fd(req, dev, io_req->offset); + if(fd > 0){ + /* If mmapping is otherwise OK, but the first access to the + * page is a write, then it's not mapped in yet. So we have + * to write the data to disk first, then we can map the disk + * page in and continue normally from there. + */ + if((req->cmd == WRITE) && !is_remapped(req->buffer)){ + io_req->map_fd = dev->fd; + io_req->map_offset = io_req->offset + + dev->cow.data_offset; + dev->write_maps++; + } + else return(prepare_mmap_request(dev, fd, io_req->offset, req, + io_req)); + } + + if(req->cmd == READ) + dev->nomap_reads++; + else dev->nomap_writes++; + + io_req->op = (req->cmd == READ) ? UBD_READ : UBD_WRITE; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; - io_req->offset = ((__u64) block) << 9; - io_req->length = nsect << 9; io_req->buffer = req->buffer; io_req->sectorsize = 1 << 9; - io_req->sector_mask = 0; - io_req->cow_offset = -1; - io_req->error = 0; - if(dev->cow.file != NULL) cowify_req(io_req, dev); + if(dev->cow.file != NULL) + cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset); return(0); } @@ -1072,6 +1225,142 @@ static int ubd_revalidate(kdev_t rdev) return(err); } +static int ubd_check_remapped(int fd, unsigned long address, int is_write, + __u64 offset) +{ + __u64 bitmap_offset; + unsigned long new_bitmap[2]; + int i, err, n; + + /* If it's not a write access, we can't do anything about it */ + if(!is_write) + return(0); + + /* We have a write */ + for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){ + struct ubd *dev = &ubd_dev[i]; + + if((dev->fd != fd) && (dev->cow.fd != fd)) + continue; + + /* It's a write to a ubd device */ + + if(!dev->openflags.w){ + /* It's a write access on a read-only device - probably + * shouldn't happen. If the kernel is trying to change + * something with no intention of writing it back out, + * then this message will clue us in that this needs + * fixing + */ + printk("Write access to mapped page from readonly ubd " + "device %d\n", i); + return(0); + } + + /* It's a write to a writeable ubd device - it must be COWed + * because, otherwise, the page would have been mapped in + * writeable + */ + + if(!dev->cow.file) + panic("Write fault on writeable non-COW ubd device %d", + i); + + /* It should also be an access to the backing file since the + * COW pages should be mapped in read-write + */ + + if(fd == dev->fd) + panic("Write fault on a backing page of ubd " + "device %d\n", i); + + /* So, we do the write, copying the backing data to the COW + * file... + */ + + err = os_seek_file(dev->fd, offset + dev->cow.data_offset); + if(err) + panic("Couldn't seek to %lld in COW file of ubd " + "device %d, errno = %d", + offset + dev->cow.data_offset, i, -err); + + err = os_write_file(dev->fd, (void *) address, PAGE_SIZE); + if(err != PAGE_SIZE) + panic("Couldn't copy data to COW file of ubd " + "device %d, errno = %d", i, -err); + + /* ... updating the COW bitmap... */ + + cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset, + dev->cow.bitmap, dev->cow.bitmap_offset, + new_bitmap); + + err = os_seek_file(dev->fd, bitmap_offset); + if(err) + panic("Couldn't seek to %lld in COW file of ubd " + "device %d, errno = %d", bitmap_offset, i, -err); + + err = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap)); + if(err != sizeof(new_bitmap)) + panic("Couldn't update bitmap of ubd device %d, " + "errno = %d", i, -err); + + /* Maybe we can map the COW page in, and maybe we can't. If + * it is a pre-V3 COW file, we can't, since the alignment will + * be wrong. If it is a V3 or later COW file which has been + * moved to a system with a larger page size, then maybe we + * can't, depending on the exact location of the page. + */ + + offset += dev->cow.data_offset; + + /* Remove the remapping, putting the original anonymous page + * back. If the COW file can be mapped in, that is done. + * Otherwise, the COW page is read in. + */ + + if(!physmem_remove_mapping((void *) address)) + panic("Address 0x%lx not remapped by ubd device %d", + address, i); + if((offset % UBD_MMAP_BLOCK_SIZE) == 0) + physmem_subst_mapping((void *) address, dev->fd, + offset, 1); + else { + err = os_seek_file(dev->fd, offset); + if(err) + panic("Couldn't seek to %lld in COW file of " + "ubd device %d, errno = %d", offset, i, + -err); + + n = os_read_file(dev->fd, (void *) address, PAGE_SIZE); + if(n != PAGE_SIZE) + panic("Failed to read page from offset %llx of " + "COW file of ubd device %d, errno = %d", + offset, i, -err); + } + + return(1); + } + + /* It's not a write on a ubd device */ + return(0); +} + +static struct remapper ubd_remapper = { + .list = LIST_HEAD_INIT(ubd_remapper.list), + .proc = ubd_check_remapped, +}; + +static int ubd_remapper_setup(void) +{ + if(ubd_do_mmap) + register_remapper(&ubd_remapper); + + return(0); +} + +__initcall(ubd_remapper_setup); + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/include/ubd_user.h b/arch/um/include/ubd_user.h index 6e63af7..bddccc0 100644 --- a/arch/um/include/ubd_user.h +++ b/arch/um/include/ubd_user.h @@ -9,7 +9,7 @@ #include "os.h" -enum ubd_req { UBD_READ, UBD_WRITE }; +enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP }; struct io_thread_req { enum ubd_req op; @@ -20,8 +20,10 @@ struct io_thread_req { char *buffer; int sectorsize; unsigned long sector_mask; - unsigned long cow_offset; + unsigned long long cow_offset; unsigned long bitmap_words[2]; + int map_fd; + unsigned long long map_offset; int error; }; @@ -31,7 +33,7 @@ extern int open_ubd_file(char *file, struct openflags *openflags, int *create_cow_out); extern int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, int sectorsize, - int *bitmap_offset_out, + int alignment, int *bitmap_offset_out, unsigned long *bitmap_len_out, int *data_offset_out); extern int read_cow_bitmap(int fd, void *buf, int offset, int len); |