[PATCH] byteswap target for device-mapper linux-2.6.4-rc1-root/drivers/md/Kconfig | 15 linux-2.6.4-rc1-root/drivers/md/Makefile | 1 linux-2.6.4-rc1-root/drivers/md/dm-byteswap.c | 634 ++++++++++++++++++++++++++ 3 files changed, 650 insertions(+) diff -puN /dev/null drivers/md/dm-byteswap.c --- /dev/null 2004-01-17 00:25:55.000000000 +0100 +++ linux-2.6.4-rc1-root/drivers/md/dm-byteswap.c 2004-02-29 22:45:07.520474680 +0100 @@ -0,0 +1,634 @@ +/* + * Copyright (C) 2003 Christophe Saout + * Copyright (C) 2004 Bartlomiej Zolnierkiewicz + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dm.h" + +/* + * per bio private data + */ +struct byteswap_io { + struct dm_target *target; + struct bio *bio; + struct bio *first_clone; + struct work_struct work; + atomic_t pending; + int error; +}; + +/* + * context holding the current state of a multi-part conversion + */ +struct convert_context { + struct bio *bio_in; + struct bio *bio_out; + unsigned int offset_in; + unsigned int offset_out; + int idx_in; + int idx_out; + sector_t sector; + int write; +}; + + +struct byteswap_config { + struct dm_dev *dev; + sector_t start; + + /* + * pool for per bio private data and + * for byteswapping buffer pages + */ + mempool_t *io_pool; + mempool_t *page_pool; +}; + +#define MIN_IOS 256 +#define MIN_POOL_PAGES 32 +#define MIN_BIO_PAGES 8 + +static kmem_cache_t *_byteswap_io_pool; + +/* + * Mempool alloc and free functions for the page + */ +static void *mempool_alloc_page(int gfp_mask, void *data) +{ + return alloc_page(gfp_mask); +} + +static void mempool_free_page(void *page, void *data) +{ + __free_page(page); +} + +/* stolen from crypto/internal.h */ +enum km_type byteswap_km_types[] = { + KM_USER0, + KM_USER1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, +}; + +static inline enum km_type byteswap_kmap_type(int out) +{ + return byteswap_km_types[(in_softirq() ? 2 : 0) + out]; +} + +/* + * This function assumes that nbytes == 1 << SECTOR_SHIFT. + */ +static int byteswap(struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + void *src_p, *dst_p; + u16 *s, *d; + unsigned int i; + + BUG_ON(nbytes != (1 << SECTOR_SHIFT)); + + src_p = kmap_atomic(src->page, byteswap_kmap_type(0)) + src->offset; + dst_p = kmap_atomic(dst->page, byteswap_kmap_type(1)) + dst->offset; + + s = src_p; + d = dst_p; + + for (i = 0; i < nbytes/2; i++) { + *d = *s << 8 | *s >> 8; + s++; + d++; + } + + kunmap_atomic(src_p, byteswap_kmap_type(0)); + kunmap_atomic(dst_p, byteswap_kmap_type(1)); + flush_dcache_page(dst->page); + + return 0; +} + +static inline int +byteswap_convert_scatterlist(struct byteswap_config *bc, struct scatterlist *out, + struct scatterlist *in, unsigned int length, + int write, sector_t sector) +{ + return byteswap(out, in, length); +} + +static void +byteswap_convert_init(struct byteswap_config *bc, struct convert_context *ctx, + struct bio *bio_out, struct bio *bio_in, + sector_t sector, int write) +{ + ctx->bio_in = bio_in; + ctx->bio_out = bio_out; + ctx->offset_in = 0; + ctx->offset_out = 0; + ctx->idx_in = bio_in ? bio_in->bi_idx : 0; + ctx->idx_out = bio_out ? bio_out->bi_idx : 0; + ctx->sector = sector; //+ cc->iv_offset; + ctx->write = write; +} + +/* + * Byteswap data from one bio to another one (can be the same one). + */ +static int byteswap_convert(struct byteswap_config *bc, + struct convert_context *ctx) +{ + int r = 0; + + while(ctx->idx_in < ctx->bio_in->bi_vcnt && + ctx->idx_out < ctx->bio_out->bi_vcnt) { + struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in); + struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); + struct scatterlist sg_in = { + .page = bv_in->bv_page, + .offset = bv_in->bv_offset + ctx->offset_in, + .length = 1 << SECTOR_SHIFT + }; + struct scatterlist sg_out = { + .page = bv_out->bv_page, + .offset = bv_out->bv_offset + ctx->offset_out, + .length = 1 << SECTOR_SHIFT + }; + + ctx->offset_in += sg_in.length; + if (ctx->offset_in >= bv_in->bv_len) { + ctx->offset_in = 0; + ctx->idx_in++; + } + + ctx->offset_out += sg_out.length; + if (ctx->offset_out >= bv_out->bv_len) { + ctx->offset_out = 0; + ctx->idx_out++; + } + + r = byteswap_convert_scatterlist(bc, &sg_out, &sg_in, sg_in.length, + ctx->write, ctx->sector); + if (r < 0) + break; + + ctx->sector++; + } + + return r; +} + +/* + * Generate a new unfragmented bio with the given size + * This should never violate the device limitations + * May return a smaller bio when running out of pages + */ +static struct bio * +byteswap_alloc_buffer(mempool_t *page_pool, unsigned int size, + struct bio *base_bio, int *bio_vec_idx) +{ + struct bio *bio; + int nr_iovecs = dm_div_up(size, PAGE_SIZE); + int gfp_mask = GFP_NOIO | __GFP_HIGHMEM; + int flags = current->flags; + int i; + + /* + * Tell VM to act less aggressively and fail earlier. + * This is not necessary but increases throughput. + * FIXME: Is this really intelligent? + */ + current->flags &= ~PF_MEMALLOC; + + if (base_bio) + bio = bio_clone(base_bio, GFP_NOIO); + else + bio = bio_alloc(GFP_NOIO, nr_iovecs); + if (!bio) { + if (flags & PF_MEMALLOC) + current->flags |= PF_MEMALLOC; + return NULL; + } + + /* if the last bio was not complete, continue where that one ended */ + bio->bi_idx = *bio_vec_idx; + bio->bi_vcnt = *bio_vec_idx; + bio->bi_size = 0; + bio->bi_flags &= ~(1 << BIO_SEG_VALID); + + /* bio->bi_idx pages have already been allocated */ + size -= bio->bi_idx * PAGE_SIZE; + + for(i = bio->bi_idx; i < nr_iovecs; i++) { + struct bio_vec *bv = bio_iovec_idx(bio, i); + + bv->bv_page = mempool_alloc(page_pool, gfp_mask); + if (!bv->bv_page) + break; + + /* + * if additional pages cannot be allocated without waiting, + * return a partially allocated bio, the caller will then try + * to allocate additional bios while submitting this partial bio + */ + if ((i - bio->bi_idx) == (MIN_BIO_PAGES - 1)) + gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; + + bv->bv_offset = 0; + if (size > PAGE_SIZE) + bv->bv_len = PAGE_SIZE; + else + bv->bv_len = size; + + bio->bi_size += bv->bv_len; + bio->bi_vcnt++; + size -= bv->bv_len; + } + + if (flags & PF_MEMALLOC) + current->flags |= PF_MEMALLOC; + + if (!bio->bi_size) { + bio_put(bio); + return NULL; + } + + /* + * Remember the last bio_vec allocated to be able + * to correctly continue after the splitting. + */ + *bio_vec_idx = bio->bi_vcnt; + + return bio; +} + +static void byteswap_free_buffer_pages(mempool_t *page_pool, + struct bio *bio, unsigned int bytes) +{ + unsigned int start, end; + struct bio_vec *bv; + int i; + + /* + * This is ugly, but Jens Axboe thinks that using bi_idx in the + * endio function is too dangerous at the moment, so I calculate the + * correct position using bi_vcnt and bi_size. + * The bv_offset and bv_len fields might already be modified but we + * know that we always allocated whole pages. + * A fix to the bi_idx issue in the kernel is in the works, so + * we will hopefully be able to revert to the cleaner solution soon. + */ + i = bio->bi_vcnt - 1; + bv = bio_iovec_idx(bio, i); + end = (i << PAGE_SHIFT) + (bv->bv_offset + bv->bv_len) - bio->bi_size; + start = end - bytes; + + start >>= PAGE_SHIFT; + if (!bio->bi_size) + end = bio->bi_vcnt; + else + end >>= PAGE_SHIFT; + + for(i = start; i < end; i++) { + bv = bio_iovec_idx(bio, i); + BUG_ON(!bv->bv_page); + mempool_free(bv->bv_page, page_pool); + bv->bv_page = NULL; + } +} + +/* + * One of the bios was finished. Check for completion of + * the whole request and correctly clean up the buffer. + */ +static void dec_pending(struct byteswap_io *io, int error) +{ + struct byteswap_config *bc = (struct byteswap_config *)io->target->private; + + if (error < 0) + io->error = error; + + if (!atomic_dec_and_test(&io->pending)) + return; + + if (io->first_clone) + bio_put(io->first_clone); + + bio_endio(io->bio, io->bio->bi_size, io->error); + + mempool_free(io, bc->io_pool); +} + +/* + * kbyteswapd: + * + * Needed because it would be very unwise to do byteswapping in an + * interrupt context, so bios returning from read requests get + * queued here. + */ +static struct workqueue_struct *_kbyteswapd_workqueue; + +static void kbyteswapd_do_work(void *data) +{ + struct byteswap_io *io = (struct byteswap_io *)data; + struct byteswap_config *bc = (struct byteswap_config *)io->target->private; + struct convert_context ctx; + int r; + + byteswap_convert_init(bc, &ctx, io->bio, io->bio, + io->bio->bi_sector - io->target->begin, 0); + r = byteswap_convert(bc, &ctx); + + dec_pending(io, r); +} + +static void kbyteswapd_queue_io(struct byteswap_io *io) +{ + INIT_WORK(&io->work, kbyteswapd_do_work, io); + queue_work(_kbyteswapd_workqueue, &io->work); +} + +/* + * + */ +static int byteswap_ctr(struct dm_target *ti, unsigned int argc, char **argv) +{ + struct byteswap_config *bc; + + if (argc != 2) { + ti->error = "dm-byteswap: Not enough arguments"; + return -EINVAL; + } + + bc = kmalloc(sizeof(*bc), GFP_KERNEL); + if (!bc) { + ti->error = "dm-byteswap: Cannot allocate byteswap context"; + return -ENOMEM; + } + + bc->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab, + mempool_free_slab, _byteswap_io_pool); + if (!bc->io_pool) { + ti->error = "dm-byteswap: Cannot allocate byteswap io mempool"; + goto bad1; + } + + bc->page_pool = mempool_create(MIN_POOL_PAGES, mempool_alloc_page, + mempool_free_page, NULL); + if (!bc->page_pool) { + ti->error = "dm-byteswap: Cannot allocate page mempool"; + goto bad2; + } + + if (sscanf(argv[1], SECTOR_FORMAT, &bc->start) != 1) { + ti->error = "dm-byteswap: Invalid device sector"; + goto bad3; + } + + if (dm_get_device(ti, argv[0], bc->start, ti->len, + dm_table_get_mode(ti->table), &bc->dev)) { + ti->error = "dm-byteswap: Device lookup failed"; + goto bad3; + } + + ti->private = bc; + return 0; + +bad3: + mempool_destroy(bc->page_pool); +bad2: + mempool_destroy(bc->io_pool); +bad1: + return -EINVAL; +} + +static void byteswap_dtr(struct dm_target *ti) +{ + struct byteswap_config *bc = (struct byteswap_config *)ti->private; + + mempool_destroy(bc->page_pool); + mempool_destroy(bc->io_pool); + + dm_put_device(ti, bc->dev); + kfree(bc); +} + +static int byteswap_endio(struct bio *bio, unsigned int done, int error) +{ + struct byteswap_io *io = (struct byteswap_io *)bio->bi_private; + struct byteswap_config *bc = (struct byteswap_config *)io->target->private; + + if (bio_data_dir(bio) == WRITE) { + /* + * free the processed pages, even if + * it's only a partially completed write + */ + byteswap_free_buffer_pages(bc->page_pool, bio, done); + } + + if (bio->bi_size) + return 1; + + bio_put(bio); + + /* + * successful reads are byteswapped by the worker thread + */ + if ((bio_data_dir(bio) == READ) + && bio_flagged(bio, BIO_UPTODATE)) { + kbyteswapd_queue_io(io); + return 0; + } + + dec_pending(io, error); + return error; +} + +static inline struct bio * +byteswap_clone(struct byteswap_config *bc, struct byteswap_io *io, + struct bio *bio, sector_t sector, int *bvec_idx, + struct convert_context *ctx) +{ + struct bio *clone; + + if (bio_data_dir(bio) == WRITE) { + clone = byteswap_alloc_buffer(bc->page_pool, bio->bi_size, + io->first_clone, bvec_idx); + if (clone) { + ctx->bio_out = clone; + if (byteswap_convert(bc, ctx) < 0) { + byteswap_free_buffer_pages(bc->page_pool, clone, + clone->bi_size); + bio_put(clone); + return NULL; + } + } + } else + clone = bio_clone(bio, GFP_NOIO); + + if (!clone) + return NULL; + + clone->bi_private = io; + clone->bi_end_io = byteswap_endio; + clone->bi_bdev = bc->dev->bdev; + clone->bi_sector = bc->start + sector; + clone->bi_rw = bio->bi_rw; + + return clone; +} + +static int byteswap_map(struct dm_target *ti, struct bio *bio) +{ + struct byteswap_config *bc = (struct byteswap_config *)ti->private; + struct byteswap_io *io = mempool_alloc(bc->io_pool, GFP_NOIO); + struct convert_context ctx; + struct bio *clone; + unsigned int remaining = bio->bi_size; + sector_t sector = bio->bi_sector - ti->begin; + int bvec_idx = 0; + + io->target = ti; + io->bio = bio; + io->first_clone = NULL; + io->error = 0; + atomic_set(&io->pending, 1); /* hold a reference */ + + if (bio_data_dir(bio) == WRITE) + byteswap_convert_init(bc, &ctx, NULL, bio, sector, 1); + + /* + * The allocated buffers can be smaller than the whole bio, + * so repeat the whole process until all the data can be handled. + */ + while (remaining) { + clone = byteswap_clone(bc, io, bio, sector, &bvec_idx, &ctx); + if (!clone) + goto cleanup; + + if (!io->first_clone) { + /* + * hold a reference to the first clone, because it + * holds the bio_vec array and that can't be freed + * before all other clones are released + */ + bio_get(clone); + io->first_clone = clone; + } + atomic_inc(&io->pending); + + remaining -= clone->bi_size; + sector += bio_sectors(clone); + + generic_make_request(clone); + + /* out of memory -> run queues */ + if (remaining) + blk_run_queues(); + } + + /* drop reference, clones could have returned before we reach this */ + dec_pending(io, 0); + return 0; + +cleanup: + if (io->first_clone) { + dec_pending(io, -ENOMEM); + return 0; + } + + /* if no bio has been dispatched yet, we can directly return the error */ + mempool_free(io, bc->io_pool); + return -ENOMEM; +} + +static int byteswap_status(struct dm_target *ti, status_type_t type, + char *result, unsigned int maxlen) +{ + struct byteswap_config *bc = (struct byteswap_config *)ti->private; + char buffer[32]; + + switch (type) { + case STATUSTYPE_INFO: + result[0] = '\0'; + break; + + case STATUSTYPE_TABLE: + format_dev_t(buffer, bc->dev->bdev->bd_dev); + snprintf(result, maxlen, "%s " SECTOR_FORMAT, buffer, bc->start); + break; + } + return 0; +} + +static struct target_type byteswap_target = { + .name = "byteswap", + .module = THIS_MODULE, + .ctr = byteswap_ctr, + .dtr = byteswap_dtr, + .map = byteswap_map, + .status = byteswap_status, +}; + +static int __init dm_byteswap_init(void) +{ + int r; + + _byteswap_io_pool = kmem_cache_create("dm-byteswap_io", + sizeof(struct byteswap_io), + 0, 0, NULL, NULL); + if (!_byteswap_io_pool) + return -ENOMEM; + + _kbyteswapd_workqueue = create_workqueue("kbyteswapd"); + if (!_kbyteswapd_workqueue) { + r = -ENOMEM; + DMERR("couldn't create kbyteswapd"); + goto bad1; + } + + r = dm_register_target(&byteswap_target); + if (r < 0) { + DMERR("byteswap: register failed %d", r); + goto bad2; + } + + return 0; + +bad2: + destroy_workqueue(_kbyteswapd_workqueue); +bad1: + kmem_cache_destroy(_byteswap_io_pool); + return r; +} + +static void __exit dm_byteswap_exit(void) +{ + int r = dm_unregister_target(&byteswap_target); + + if (r < 0) + DMERR("byteswap: unregister failed %d", r); + + destroy_workqueue(_kbyteswapd_workqueue); + kmem_cache_destroy(_byteswap_io_pool); +} + +module_init(dm_byteswap_init); +module_exit(dm_byteswap_exit); + +MODULE_AUTHOR("Christophe Saout, Bartlomiej Zolnierkiewicz"); +MODULE_DESCRIPTION(DM_NAME " target for transparent byteswapping"); +MODULE_LICENSE("GPL"); diff -puN drivers/md/Kconfig~dm-byteswap drivers/md/Kconfig --- linux-2.6.4-rc1/drivers/md/Kconfig~dm-byteswap 2004-02-29 16:42:42.000000000 +0100 +++ linux-2.6.4-rc1-root/drivers/md/Kconfig 2004-02-29 23:00:32.295887448 +0100 @@ -188,5 +188,20 @@ config DM_CRYPT If unsure, say N. +config DM_BYTESWAP + tristate "Byteswap target support" + depends on BLK_DEV_DM && EXPERIMENTAL + help + This device-mapper target allows you to create a device that + transparently byteswaps the data on it. It is useful for + accessing non-native IDE disks on machines with byteswapped IDE bus + (Atari/Q40/Q60/TiVo) and for accessing IDE disks with byteswapped + content on machines with normal IDE bus. + + To compile this code as a module, choose M here: the module will + be called dm_byteswap. + + If unsure, say N. + endmenu diff -puN drivers/md/Makefile~dm-byteswap drivers/md/Makefile --- linux-2.6.4-rc1/drivers/md/Makefile~dm-byteswap 2004-02-29 16:42:45.000000000 +0100 +++ linux-2.6.4-rc1-root/drivers/md/Makefile 2004-02-29 16:48:35.000000000 +0100 @@ -24,6 +24,7 @@ obj-$(CONFIG_MD_MULTIPATH) += multipath. obj-$(CONFIG_BLK_DEV_MD) += md.o obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o +obj-$(CONFIG_DM_BYTESWAP) += dm-byteswap.o quiet_cmd_unroll = UNROLL $@ cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \ _