Binary files 2.4.0-test13-pre3/ID and 2.4.0-test13-pre3-lvm/ID differ diff -urN 2.4.0-test13-pre3/drivers/md/lvm-snap.c 2.4.0-test13-pre3-lvm/drivers/md/lvm-snap.c --- 2.4.0-test13-pre3/drivers/md/lvm-snap.c Thu Nov 16 15:37:28 2000 +++ 2.4.0-test13-pre3-lvm/drivers/md/lvm-snap.c Mon Dec 18 19:48:33 2000 @@ -2,13 +2,14 @@ * kernel/lvm-snap.c * * Copyright (C) 2000 Andrea Arcangeli SuSE + * Heinz Mauelshagen, Sistina Software (persistent snapshots) * * LVM snapshot driver is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * - * LVM driver is distributed in the hope that it will be useful, + * LVM snapshot driver is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. @@ -29,13 +30,31 @@ #include -static char *lvm_snap_version __attribute__ ((unused)) = "LVM 0.8final (15/02/2000)\n"; +static char *lvm_snap_version __attribute__ ((unused)) = "LVM 0.9 snapshot code (13/11/2000)\n"; + +#ifndef LockPage +#define LockPage(map) set_bit(PG_locked, &(map)->flags) +#endif extern const char *const lvm_name; extern int lvm_blocksizes[]; void lvm_snapshot_release(lv_t *); +uint lvm_pv_get_number(vg_t * vg, kdev_t rdev) +{ + uint p; + + for ( p = 0; p < vg->pv_max; p++) + { + if ( vg->pv[p] == NULL) continue; + if ( vg->pv[p]->pv_dev == rdev) break; + } + + return vg->pv[p]->pv_number; +} + + #define hashfn(dev,block,mask,chunk_size) \ ((HASHDEV(dev)^((block)/(chunk_size))) & (mask)) @@ -72,9 +91,9 @@ return ret; } -static inline void lvm_hash_link(lv_block_exception_t * exception, - kdev_t org_dev, unsigned long org_start, - lv_t * lv) +inline void lvm_hash_link(lv_block_exception_t * exception, + kdev_t org_dev, unsigned long org_start, + lv_t * lv) { struct list_head * hash_table = lv->lv_snapshot_hash_table; unsigned long mask = lv->lv_snapshot_hash_mask; @@ -97,7 +116,6 @@ pe_adjustment = (*org_sector-pe_off) % chunk_size; __org_start = *org_sector - pe_adjustment; __org_dev = *org_dev; - ret = 0; exception = lvm_find_exception_table(__org_dev, __org_start, lv); if (exception) @@ -109,7 +127,7 @@ return ret; } -static void lvm_drop_snapshot(lv_t * lv_snap, const char * reason) +void lvm_drop_snapshot(lv_t * lv_snap, const char * reason) { kdev_t last_dev; int i; @@ -118,8 +136,7 @@ or error on this snapshot --> release it */ invalidate_buffers(lv_snap->lv_dev); - last_dev = 0; - for (i = 0; i < lv_snap->lv_remap_ptr; i++) { + for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) { if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) { last_dev = lv_snap->lv_block_exception[i].rdev_new; invalidate_buffers(last_dev); @@ -149,7 +166,7 @@ blocks[i] = start++; } -static inline int get_blksize(kdev_t dev) +inline int lvm_get_blksize(kdev_t dev) { int correct_size = BLOCK_SIZE, i, major; @@ -185,6 +202,133 @@ } #endif + +void lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap) +{ + int id = 0, is = lv_snap->lv_remap_ptr; + ulong blksize_snap; + lv_COW_table_disk_t * lv_COW_table = + ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page); + + if (is == 0) return; + is--; + blksize_snap = lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new); + is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t)); + + memset(lv_COW_table, 0, blksize_snap); + for ( ; is < lv_snap->lv_remap_ptr; is++, id++) { + /* store new COW_table entry */ + lv_COW_table[id].pv_org_number = LVM_TO_DISK64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_org)); + lv_COW_table[id].pv_org_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[is].rsector_org); + lv_COW_table[id].pv_snap_number = LVM_TO_DISK64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_new)); + lv_COW_table[id].pv_snap_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[is].rsector_new); + } +} + + +/* + * writes a COW exception table sector to disk (HM) + * + */ + +int lvm_write_COW_table_block(vg_t * vg, + lv_t * lv_snap) +{ + int blksize_snap; + int end_of_table; + int idx = lv_snap->lv_remap_ptr, idx_COW_table; + int nr_pages_tmp; + int length_tmp; + ulong snap_pe_start, COW_table_sector_offset, + COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block; + ulong blocks[1]; + const char * reason; + kdev_t snap_phys_dev; + struct kiobuf * iobuf = lv_snap->lv_iobuf; + struct page * page_tmp; + lv_COW_table_disk_t * lv_COW_table = + ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page); + + idx--; + + COW_chunks_per_pe = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv_snap); + COW_entries_per_pe = LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv_snap); + + /* get physical addresse of destination chunk */ + snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; + snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; + + blksize_snap = lvm_get_blksize(snap_phys_dev); + + COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t); + idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block; + + if ( idx_COW_table == 0) memset(lv_COW_table, 0, blksize_snap); + + /* sector offset into the on disk COW table */ + COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t)); + + /* COW table block to write next */ + blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10); + + /* store new COW_table entry */ + lv_COW_table[idx_COW_table].pv_org_number = LVM_TO_DISK64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[idx].rdev_org)); + lv_COW_table[idx_COW_table].pv_org_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[idx].rsector_org); + lv_COW_table[idx_COW_table].pv_snap_number = LVM_TO_DISK64(lvm_pv_get_number(vg, snap_phys_dev)); + lv_COW_table[idx_COW_table].pv_snap_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[idx].rsector_new); + + length_tmp = iobuf->length; + iobuf->length = blksize_snap; + page_tmp = iobuf->maplist[0]; + iobuf->maplist[0] = lv_snap->lv_COW_table_page; + nr_pages_tmp = iobuf->nr_pages; + iobuf->nr_pages = 1; + + if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, + blocks, blksize_snap) != blksize_snap) + goto fail_raw_write; + + + /* initialization of next COW exception table block with zeroes */ + end_of_table = idx % COW_entries_per_pe == COW_entries_per_pe - 1; + if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table) + { + /* don't go beyond the end */ + if (idx + 1 >= lv_snap->lv_remap_end) goto good_out; + + memset(lv_COW_table, 0, blksize_snap); + + if (end_of_table) + { + idx++; + snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; + snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; + blksize_snap = lvm_get_blksize(snap_phys_dev); + blocks[0] = snap_pe_start >> (blksize_snap >> 10); + } else blocks[0]++; + + if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, + blocks, blksize_snap) != blksize_snap) + goto fail_raw_write; + } + + + good_out: + iobuf->length = length_tmp; + iobuf->maplist[0] = page_tmp; + iobuf->nr_pages = nr_pages_tmp; + return 0; + + /* slow path */ + out: + lvm_drop_snapshot(lv_snap, reason); + return 1; + + fail_raw_write: + reason = "write error"; + goto out; +} + /* * copy on write handler for one snapshot logical volume * @@ -200,9 +344,8 @@ lv_t * lv_snap) { const char * reason; - unsigned long org_start, snap_start, virt_start, pe_off; + unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off; int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size; - kdev_t snap_phys_dev; struct kiobuf * iobuf; unsigned long blocks[KIO_MAX_SECTORS]; int blksize_snap, blksize_org, min_blksize, max_blksize; @@ -238,8 +381,8 @@ iobuf = lv_snap->lv_iobuf; - blksize_org = get_blksize(org_phys_dev); - blksize_snap = get_blksize(snap_phys_dev); + blksize_org = lvm_get_blksize(org_phys_dev); + blksize_snap = lvm_get_blksize(snap_phys_dev); max_blksize = max(blksize_org, blksize_snap); min_blksize = min(blksize_org, blksize_snap); max_sectors = KIO_MAX_SECTORS * (min_blksize>>9); @@ -268,7 +411,7 @@ } #ifdef DEBUG_SNAPSHOT - /* invalidate the logcial snapshot buffer cache */ + /* invalidate the logical snapshot buffer cache */ invalidate_snap_cache(virt_start, lv_snap->lv_chunk_size, lv_snap->lv_dev); #endif @@ -277,15 +420,20 @@ so update the execption table */ lv_snap->lv_block_exception[idx].rdev_org = org_phys_dev; lv_snap->lv_block_exception[idx].rsector_org = org_start; + lvm_hash_link(lv_snap->lv_block_exception + idx, org_phys_dev, org_start, lv_snap); lv_snap->lv_remap_ptr = idx + 1; - return 1; + if (lv_snap->lv_snapshot_use_rate > 0) { + if (lv_snap->lv_remap_ptr * 100 / lv_snap->lv_remap_end >= lv_snap->lv_snapshot_use_rate) + wake_up_interruptible(&lv_snap->lv_snapshot_wait); + } + return 0; /* slow path */ out: lvm_drop_snapshot(lv_snap, reason); - return -1; + return 1; fail_out_of_space: reason = "out of space"; @@ -301,7 +449,7 @@ goto out; } -static int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors) +int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors) { int bytes, nr_pages, err, i; @@ -312,33 +460,17 @@ goto out; err = -ENOMEM; - iobuf->locked = 1; + iobuf->locked = 0; iobuf->nr_pages = 0; for (i = 0; i < nr_pages; i++) { struct page * page; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,27) page = alloc_page(GFP_KERNEL); if (!page) goto out; -#else - { - unsigned long addr = __get_free_page(GFP_USER); - if (!addr) - goto out; - iobuf->pagelist[i] = addr; - page = virt_to_page(addr); - } -#endif iobuf->maplist[i] = page; - /* the only point to lock the page here is to be allowed - to share unmap_kiobuf() in the fail-path */ -#ifndef LockPage -#define LockPage(map) set_bit(PG_locked, &(map)->flags) -#endif - LockPage(page); iobuf->nr_pages++; } iobuf->offset = 0; @@ -360,7 +492,7 @@ return mem; } -static int lvm_snapshot_alloc_hash_table(lv_t * lv) +int lvm_snapshot_alloc_hash_table(lv_t * lv) { int err; unsigned long buckets, max_buckets, size; @@ -380,6 +512,7 @@ if (!hash) goto out; + lv->lv_snapshot_hash_table_size = size; lv->lv_snapshot_hash_mask = buckets-1; while (buckets--) @@ -407,12 +540,20 @@ err = lvm_snapshot_alloc_hash_table(lv_snap); if (err) goto out_free_kiovec; + + + lv_snap->lv_COW_table_page = alloc_page(GFP_KERNEL); + if (!lv_snap->lv_COW_table_page) + goto out_free_kiovec; + out: return err; out_free_kiovec: unmap_kiobuf(lv_snap->lv_iobuf); free_kiovec(1, &lv_snap->lv_iobuf); + vfree(lv_snap->lv_snapshot_hash_table); + lv_snap->lv_snapshot_hash_table = NULL; goto out; } @@ -427,10 +568,17 @@ { vfree(lv->lv_snapshot_hash_table); lv->lv_snapshot_hash_table = NULL; + lv->lv_snapshot_hash_table_size = 0; } if (lv->lv_iobuf) { + unmap_kiobuf(lv->lv_iobuf); free_kiovec(1, &lv->lv_iobuf); lv->lv_iobuf = NULL; + } + if (lv->lv_COW_table_page) + { + free_page((ulong)lv->lv_COW_table_page); + lv->lv_COW_table_page = NULL; } } diff -urN 2.4.0-test13-pre3/drivers/md/lvm.c 2.4.0-test13-pre3-lvm/drivers/md/lvm.c --- 2.4.0-test13-pre3/drivers/md/lvm.c Thu Dec 14 22:34:04 2000 +++ 2.4.0-test13-pre3-lvm/drivers/md/lvm.c Mon Dec 18 19:44:19 2000 @@ -1,12 +1,12 @@ /* * kernel/lvm.c * - * Copyright (C) 1997 - 2000 Heinz Mauelshagen, Germany + * Copyright (C) 1997 - 2000 Heinz Mauelshagen, Sistina Software * * February-November 1997 * April-May,July-August,November 1998 * January-March,May,July,September,October 1999 - * January,February 2000 + * January,February,July,September-November 2000 * * * LVM driver is free software; you can redistribute it and/or modify @@ -38,7 +38,7 @@ * lvm_status_byindex_req_t vars * 04/05/1998 - added multiple device support * 08/05/1998 - added support to set/clear extendable flag in volume group - * 09/05/1998 - changed output of lvm_proc_get_info() because of + * 09/05/1998 - changed output of lvm_proc_get_global_info() because of * support for free (eg. longer) logical volume names * 12/05/1998 - added spin_locks (thanks to Pascal van Dam * ) @@ -122,18 +122,36 @@ * - avoided "/dev/" in proc filesystem output * - avoided inline strings functions lvm_strlen etc. * 14/02/2000 - support for 2.3.43 - * - integrated Andrea Arcangeli's snapshot code + * - integrated Andrea Arcagneli's snapshot code + * 25/06/2000 - james (chip) , IKKHAYD! roffl + * 26/06/2000 - enhanced lv_extend_reduce for snapshot logical volume support + * 06/09/2000 - added devfs support + * 07/09/2000 - changed IOP version to 9 + * - started to add new char ioctl LV_STATUS_BYDEV_T to support + * getting an lv_t based on the dev_t of the Logical Volume + * 14/09/2000 - enhanced lvm_do_lv_create to upcall VFS functions + * to sync and lock, activate snapshot and unlock the FS + * (to support journaled filesystems) + * 18/09/2000 - hardsector size support + * 27/09/2000 - implemented lvm_do_lv_rename() and lvm_do_vg_rename() + * 30/10/2000 - added Andi Kleen's LV_BMAP ioctl to support LILO + * 01/11/2000 - added memory information on hash tables to + * lvm_proc_get_global_info() + * 02/11/2000 - implemented /proc/lvm/ hierarchy * 07/12/2000 - make sure lvm_make_request_fn returns correct value - 0 or 1 - NeilBrown * */ -static char *lvm_version = "LVM version 0.8final by Heinz Mauelshagen (15/02/2000)\n"; -static char *lvm_short_version = "version 0.8final (15/02/2000)"; +static char *lvm_version = "LVM version 0.9 by Heinz Mauelshagen (13/11/2000)\n"; +static char *lvm_short_version = "version 0.9 (13/11/2000)"; #define MAJOR_NR LVM_BLK_MAJOR #define DEVICE_OFF(device) +/* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */ +/* #define LVM_VFS_ENHANCEMENT */ + #include #include @@ -166,17 +184,15 @@ #include #endif -#define LOCAL_END_REQUEST - #include #include #include #include -#define LVM_CORRECT_READ_AHEAD(a) \ - (((a) < LVM_MIN_READ_AHEAD || (a) > LVM_MAX_READ_AHEAD) \ - ? LVM_MAX_READ_AHEAD : (a)) +#define LVM_CORRECT_READ_AHEAD( a) \ + if ( a < LVM_MIN_READ_AHEAD || \ + a > LVM_MAX_READ_AHEAD) a = LVM_MAX_READ_AHEAD; #ifndef WRITEA # define WRITEA WRITE @@ -195,8 +211,7 @@ static void lvm_dummy_device_request(request_queue_t *); #define DEVICE_REQUEST lvm_dummy_device_request -static int lvm_make_request_fn(request_queue_t *, int, struct buffer_head*); -static void lvm_plug_device_noop(request_queue_t *, kdev_t); +static int lvm_make_request_fn(request_queue_t*, int, struct buffer_head*); static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong); static int lvm_blk_open(struct inode *, struct file *); @@ -205,13 +220,21 @@ static int lvm_chr_close(struct inode *, struct file *); static int lvm_blk_close(struct inode *, struct file *); +static int lvm_user_bmap(struct inode *, struct lv_bmap *); static int lvm_chr_ioctl(struct inode *, struct file *, uint, ulong); #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS -static int lvm_proc_get_info(char *, char **, off_t, int); -static int (*lvm_proc_get_info_ptr) (char *, char **, off_t, int) = -&lvm_proc_get_info; +int lvm_proc_read_vg_info(char *, char **, off_t, int, int *, void *); +int lvm_proc_read_lv_info(char *, char **, off_t, int, int *, void *); +int lvm_proc_read_pv_info(char *, char **, off_t, int, int *, void *); +static int lvm_proc_get_global_info(char *, char **, off_t, int, int *, void *); +void lvm_do_create_proc_entry_of_vg ( vg_t *); +inline void lvm_do_remove_proc_entry_of_vg ( vg_t *); +inline void lvm_do_create_proc_entry_of_lv ( vg_t *, lv_t *); +inline void lvm_do_remove_proc_entry_of_lv ( vg_t *, lv_t *); +inline void lvm_do_create_proc_entry_of_pv ( vg_t *, pv_t *); +inline void lvm_do_remove_proc_entry_of_pv ( vg_t *, pv_t *); #endif #ifdef LVM_HD_NAME @@ -226,10 +249,16 @@ static void lvm_init_vars(void); /* external snapshot calls */ -int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *); -int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, lv_t *); -int lvm_snapshot_alloc(lv_t *); -void lvm_snapshot_release(lv_t *); +extern inline int lvm_get_blksize(kdev_t); +extern int lvm_snapshot_alloc(lv_t *); +extern void lvm_snapshot_fill_COW_page(vg_t *, lv_t *); +extern int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, lv_t *); +extern int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *); +extern void lvm_snapshot_release(lv_t *); +extern int lvm_write_COW_table_block(vg_t *, lv_t *); +extern inline void lvm_hash_link(lv_block_exception_t *, kdev_t, ulong, lv_t *); +extern int lvm_snapshot_alloc_hash_table(lv_t *); +extern void lvm_drop_snapshot(lv_t *, char *); #ifdef LVM_HD_NAME extern void (*lvm_hd_name_ptr) (char *, int); @@ -237,21 +266,30 @@ static int lvm_map(struct buffer_head *, int); static int lvm_do_lock_lvm(void); static int lvm_do_le_remap(vg_t *, void *); -static int lvm_do_pe_lock_unlock(vg_t *r, void *); -static int lvm_do_vg_create(int, void *); -static int lvm_do_vg_extend(vg_t *, void *); -static int lvm_do_vg_reduce(vg_t *, void *); -static int lvm_do_vg_remove(int); + +static int lvm_do_pv_create(pv_t *, vg_t *, ulong); +static int lvm_do_pv_remove(vg_t *, ulong); static int lvm_do_lv_create(int, char *, lv_t *); -static int lvm_do_lv_remove(int, char *, int); static int lvm_do_lv_extend_reduce(int, char *, lv_t *); +static int lvm_do_lv_remove(int, char *, int); +static int lvm_do_lv_rename(vg_t *, lv_req_t *, lv_t *); static int lvm_do_lv_status_byname(vg_t *r, void *); -static int lvm_do_lv_status_byindex(vg_t *, void *arg); +static int lvm_do_lv_status_byindex(vg_t *, void *); +static int lvm_do_lv_status_bydev(vg_t *, void *); + +static int lvm_do_pe_lock_unlock(vg_t *r, void *); + static int lvm_do_pv_change(vg_t*, void*); static int lvm_do_pv_status(vg_t *, void *); + +static int lvm_do_vg_create(int, void *); +static int lvm_do_vg_extend(vg_t *, void *); +static int lvm_do_vg_reduce(vg_t *, void *); +static int lvm_do_vg_rename(vg_t *, void *); +static int lvm_do_vg_remove(int); static void lvm_geninit(struct gendisk *); #ifdef LVM_GET_INODE -static struct inode *lvm_get_inode(kdev_t); +static struct inode *lvm_get_inode(int); void lvm_clear_inode(struct inode *); #endif /* END Internal function prototypes */ @@ -259,10 +297,19 @@ /* volume group descriptor area pointers */ static vg_t *vg[ABS_MAX_VG]; + +#ifdef CONFIG_DEVFS_FS +static devfs_handle_t lvm_devfs_handle; +static devfs_handle_t vg_devfs_handle[MAX_VG]; +static devfs_handle_t ch_devfs_handle[MAX_VG]; +static devfs_handle_t lv_devfs_handle[MAX_LV]; +#endif + static pv_t *pvp = NULL; static lv_t *lvp = NULL; static pe_t *pep = NULL; static pe_t *pep1 = NULL; +static char *basename = NULL; /* map from block minor number to VG and LV numbers */ @@ -287,7 +334,6 @@ static char pv_name[NAME_LEN]; /* static char rootvg[NAME_LEN] = { 0, }; */ -static uint lv_open = 0; const char *const lvm_name = LVM_NAME; static int lock = 0; static int loadtime = 0; @@ -299,27 +345,31 @@ static DECLARE_WAIT_QUEUE_HEAD(lvm_map_wait); static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; -static devfs_handle_t lvm_devfs_handle; -static devfs_handle_t vg_devfs_handle[MAX_VG]; -static devfs_handle_t ch_devfs_handle[MAX_VG]; -static devfs_handle_t lv_devfs_handle[MAX_LV]; +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS +static struct proc_dir_entry *lvm_proc_dir = NULL; +static struct proc_dir_entry *lvm_proc_vg_subdir = NULL; +struct proc_dir_entry *pde = NULL; +#endif static struct file_operations lvm_chr_fops = { - owner: THIS_MODULE, open: lvm_chr_open, release: lvm_chr_close, ioctl: lvm_chr_ioctl, }; +#define BLOCK_DEVICE_OPERATIONS +/* block device operations structure needed for 2.3.38? and above */ static struct block_device_operations lvm_blk_dops = { open: lvm_blk_open, release: lvm_blk_close, - ioctl: lvm_blk_ioctl + ioctl: lvm_blk_ioctl, }; + /* gendisk structures */ static struct hd_struct lvm_hd_struct[MAX_LV]; static int lvm_blocksizes[MAX_LV] = @@ -364,21 +414,32 @@ printk(KERN_ERR "%s -- register_chrdev failed\n", lvm_name); return -EIO; } - if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0) { +#ifdef BLOCK_DEVICE_OPERATIONS + if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0) +#else + if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_fops) < 0) +#endif + { printk("%s -- register_blkdev failed\n", lvm_name); if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name); return -EIO; } +#ifdef CONFIG_DEVFS_FS lvm_devfs_handle = devfs_register( 0 , "lvm", 0, 0, LVM_CHAR_MAJOR, S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, &lvm_chr_fops, NULL); +#endif #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS - create_proc_info_entry(LVM_NAME, S_IFREG | S_IRUGO, - &proc_root, lvm_proc_get_info_ptr); + lvm_proc_dir = create_proc_entry (LVM_DIR, S_IFDIR, &proc_root); + if (lvm_proc_dir != NULL) { + lvm_proc_vg_subdir = create_proc_entry (LVM_VG_SUBDIR, S_IFDIR, lvm_proc_dir); + pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir); + if ( pde != NULL) pde->read_proc = &lvm_proc_get_global_info; + } #endif lvm_init_vars(); @@ -405,7 +466,7 @@ blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn); - blk_queue_pluggable(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_plug_device_noop); + /* optional read root VGDA */ /* if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg); @@ -433,7 +494,9 @@ { struct gendisk *gendisk_ptr = NULL, *gendisk_ptr_prev = NULL; +#ifdef CONFIG_DEVFS_FS devfs_unregister (lvm_devfs_handle); +#endif if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) { printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name); @@ -456,9 +519,12 @@ blk_size[MAJOR_NR] = NULL; blksize_size[MAJOR_NR] = NULL; + hardsect_size[MAJOR_NR] = NULL; #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS - remove_proc_entry(LVM_NAME, &proc_root); + remove_proc_entry(LVM_GLOBAL, lvm_proc_dir); + remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir); + remove_proc_entry(LVM_DIR, &proc_root); #endif #ifdef LVM_HD_NAME @@ -486,8 +552,11 @@ loadtime = CURRENT_TIME; + lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; + pe_lock_req.lock = UNLOCK_PE; - pe_lock_req.data.lv_dev = pe_lock_req.data.pv_dev = 0; + pe_lock_req.data.lv_dev = \ + pe_lock_req.data.pv_dev = \ pe_lock_req.data.pv_offset = 0; /* Initialize VG pointers */ @@ -531,6 +600,9 @@ if (VG_CHR(minor) > MAX_VG) return -ENXIO; lvm_chr_open_count++; + + MOD_INC_USE_COUNT; + return 0; } /* lvm_chr_open() */ @@ -592,7 +664,7 @@ MOD_INC_USE_COUNT; while (GET_USE_COUNT(&__this_module) > 1) MOD_DEC_USE_COUNT; -#endif /* MODULE */ +#endif /* MODULE */ lock = 0; /* release lock */ wake_up_interruptible(&lvm_wait); return 0; @@ -612,17 +684,21 @@ /* create a VGDA */ return lvm_do_vg_create(minor, arg); - case VG_REMOVE: - /* remove an inactive VGDA */ - return lvm_do_vg_remove(minor); - case VG_EXTEND: /* extend a volume group */ - return lvm_do_vg_extend(vg_ptr,arg); + return lvm_do_vg_extend(vg_ptr, arg); case VG_REDUCE: /* reduce a volume group */ - return lvm_do_vg_reduce(vg_ptr,arg); + return lvm_do_vg_reduce(vg_ptr, arg); + + case VG_RENAME: + /* rename a volume group */ + return lvm_do_vg_rename(vg_ptr, arg); + + case VG_REMOVE: + /* remove an inactive VGDA */ + return lvm_do_vg_remove(minor); case VG_SET_EXTENDABLE: @@ -660,20 +736,22 @@ /* get volume group count */ for (l = v = 0; v < ABS_MAX_VG; v++) { if (vg[v] != NULL) { - if (copy_to_user(arg + l++ * NAME_LEN, + if (copy_to_user(arg + l * NAME_LEN, vg[v]->vg_name, NAME_LEN) != 0) return -EFAULT; + l++; } } return 0; case LV_CREATE: - case LV_REMOVE: case LV_EXTEND: case LV_REDUCE: - /* create, remove, extend or reduce a logical volume */ + case LV_REMOVE: + case LV_RENAME: + /* create, extend, reduce, remove or rename a logical volume */ if (vg_ptr == NULL) return -ENXIO; if (copy_from_user(&lv_req, arg, sizeof(lv_req)) != 0) return -EFAULT; @@ -686,52 +764,54 @@ case LV_CREATE: return lvm_do_lv_create(minor, lv_req.lv_name, &lv); - case LV_REMOVE: - return lvm_do_lv_remove(minor, lv_req.lv_name, -1); - case LV_EXTEND: case LV_REDUCE: return lvm_do_lv_extend_reduce(minor, lv_req.lv_name, &lv); + case LV_REMOVE: + return lvm_do_lv_remove(minor, lv_req.lv_name, -1); + + case LV_RENAME: + return lvm_do_lv_rename(vg_ptr, &lv_req, &lv); } + + case LV_STATUS_BYNAME: /* get status of a logical volume by name */ - return lvm_do_lv_status_byname(vg_ptr,arg); + return lvm_do_lv_status_byname(vg_ptr, arg); + case LV_STATUS_BYINDEX: /* get status of a logical volume by index */ - return lvm_do_lv_status_byindex(vg_ptr,arg); + return lvm_do_lv_status_byindex(vg_ptr, arg); + + + case LV_STATUS_BYDEV: + return lvm_do_lv_status_bydev(vg_ptr, arg); + case PV_CHANGE: /* change a physical volume */ return lvm_do_pv_change(vg_ptr,arg); + case PV_STATUS: /* get physical volume data (pv_t structure only) */ return lvm_do_pv_status(vg_ptr,arg); + case PV_FLUSH: /* physical volume buffer flush/invalidate */ if (copy_from_user(&pv_flush_req, arg, sizeof(pv_flush_req)) != 0) return -EFAULT; - for ( v = 0; v < ABS_MAX_VG; v++) { - unsigned int p; - if ( vg[v] == NULL) continue; - for ( p = 0; p < vg[v]->pv_max; p++) { - if ( vg[v]->pv[p] != NULL && - strcmp ( vg[v]->pv[p]->pv_name, - pv_flush_req.pv_name) == 0) { - fsync_dev ( vg[v]->pv[p]->pv_dev); - invalidate_buffers ( vg[v]->pv[p]->pv_dev); - return 0; - } - } - } + fsync_dev(pv_flush_req.pv_dev); + invalidate_buffers(pv_flush_req.pv_dev); return 0; + default: printk(KERN_WARNING "%s -- lvm_chr_ioctl: unknown command %x\n", @@ -754,11 +834,10 @@ "%s -- lvm_chr_close VG#: %d\n", lvm_name, VG_CHR(minor)); #endif - lock_kernel(); #ifdef LVM_TOTAL_RESET if (lvm_reset_spindown > 0) { lvm_reset_spindown = 0; - lvm_chr_open_count = 1; + lvm_chr_open_count = 0; } #endif @@ -767,7 +846,8 @@ lock = 0; /* release lock */ wake_up_interruptible(&lvm_wait); } - unlock_kernel(); + + MOD_DEC_USE_COUNT; return 0; } /* lvm_chr_close() */ @@ -815,6 +895,10 @@ if (!(lv_ptr->lv_access & LV_WRITE)) return -EACCES; } +#ifndef BLOCK_DEVICE_OPERATIONS + file->f_op = &lvm_blk_fops; +#endif + /* be sure to increment VG counter */ if (lv_ptr->lv_open == 0) vg_ptr->lv_open++; lv_ptr->lv_open++; @@ -863,7 +947,7 @@ lvm_name, lv_ptr->lv_size); #endif if (put_user(lv_ptr->lv_size, (long *)arg)) - return -EFAULT; + return -EFAULT; break; @@ -892,7 +976,7 @@ if ((long) arg < LVM_MIN_READ_AHEAD || (long) arg > LVM_MAX_READ_AHEAD) return -EINVAL; - read_ahead[MAJOR_NR] = lv_ptr->lv_read_ahead = (long) arg; + lv_ptr->lv_read_ahead = (long) arg; break; @@ -944,6 +1028,10 @@ /* set access flags of a logical volume */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; lv_ptr->lv_access = (ulong) arg; + if ( lv_ptr->lv_access & LV_WRITE) + set_device_ro(lv_ptr->lv_dev, 0); + else + set_device_ro(lv_ptr->lv_dev, 1); break; @@ -955,6 +1043,10 @@ lv_ptr->lv_status = (ulong) arg; break; + case LV_BMAP: + /* turn logical block into (dev_t, block). non privileged. */ + return lvm_user_bmap(inode, (struct lv_bmap *) arg); + break; case LV_SET_ALLOCATION: /* set allocation flags of a logical volume */ @@ -962,6 +1054,37 @@ lv_ptr->lv_allocation = (ulong) arg; break; + case LV_SNAPSHOT_USE_RATE: + if (!(lv_ptr->lv_access & LV_SNAPSHOT)) return -EPERM; + { + lv_snapshot_use_rate_req_t lv_snapshot_use_rate_req; + + if (copy_from_user(&lv_snapshot_use_rate_req, arg, + sizeof(lv_snapshot_use_rate_req_t))) + return -EFAULT; + if (lv_snapshot_use_rate_req.rate < 0 || + lv_snapshot_use_rate_req.rate > 100) return -EFAULT; + + switch (lv_snapshot_use_rate_req.block) + { + case 0: + lv_ptr->lv_snapshot_use_rate = lv_snapshot_use_rate_req.rate; + if (lv_ptr->lv_remap_ptr * 100 / lv_ptr->lv_remap_end < lv_ptr->lv_snapshot_use_rate) + interruptible_sleep_on (&lv_ptr->lv_snapshot_wait); + break; + + case O_NONBLOCK: + break; + + default: + return -EFAULT; + } + lv_snapshot_use_rate_req.rate = lv_ptr->lv_remap_ptr * 100 / lv_ptr->lv_remap_end; + if (copy_to_user(arg, &lv_snapshot_use_rate_req, + sizeof(lv_snapshot_use_rate_req_t))) + return -EFAULT; + } + break; default: printk(KERN_WARNING @@ -999,20 +1122,163 @@ } /* lvm_blk_close() */ +static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result) +{ + struct buffer_head bh; + unsigned long block; + int err; + + if (get_user(block, &user_result->lv_block)) + return -EFAULT; + + memset(&bh,0,sizeof bh); + bh.b_rsector = block; + bh.b_dev = bh.b_rdev = inode->i_dev; + bh.b_size = lvm_get_blksize(bh.b_dev); + if ((err=lvm_map(&bh, READ)) < 0) { + printk("lvm map failed: %d\n", err); + return -EINVAL; + } + + return put_user( kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) || + put_user(bh.b_rsector, &user_result->lv_block) ? -EFAULT : 0; +} + + +/* + * provide VG info for proc filesystem use (global) + */ +int lvm_vg_info(vg_t *vg_ptr, char *buf) { + int sz = 0; + char inactive_flag = ' '; + + if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I'; + sz = sprintf(buf, + "\nVG: %c%s [%d PV, %d LV/%d open] " + " PE Size: %d KB\n" + " Usage [KB/PE]: %d /%d total " + "%d /%d used %d /%d free", + inactive_flag, + vg_ptr->vg_name, + vg_ptr->pv_cur, + vg_ptr->lv_cur, + vg_ptr->lv_open, + vg_ptr->pe_size >> 1, + vg_ptr->pe_size * vg_ptr->pe_total >> 1, + vg_ptr->pe_total, + vg_ptr->pe_allocated * vg_ptr->pe_size >> 1, + vg_ptr->pe_allocated, + (vg_ptr->pe_total - vg_ptr->pe_allocated) * + vg_ptr->pe_size >> 1, + vg_ptr->pe_total - vg_ptr->pe_allocated); + return sz; +} + + +/* + * provide LV info for proc filesystem use (global) + */ +int lvm_lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf) { + int sz = 0; + char inactive_flag = 'A', allocation_flag = ' ', + stripes_flag = ' ', rw_flag = ' '; + + if (!(lv_ptr->lv_status & LV_ACTIVE)) + inactive_flag = 'I'; + rw_flag = 'R'; + if (lv_ptr->lv_access & LV_WRITE) + rw_flag = 'W'; + allocation_flag = 'D'; + if (lv_ptr->lv_allocation & LV_CONTIGUOUS) + allocation_flag = 'C'; + stripes_flag = 'L'; + if (lv_ptr->lv_stripes > 1) + stripes_flag = 'S'; + sz += sprintf(buf+sz, + "[%c%c%c%c", + inactive_flag, + rw_flag, + allocation_flag, + stripes_flag); + if (lv_ptr->lv_stripes > 1) + sz += sprintf(buf+sz, "%-2d", + lv_ptr->lv_stripes); + else + sz += sprintf(buf+sz, " "); + basename = strrchr(lv_ptr->lv_name, '/'); + if ( basename == 0) basename = lv_ptr->lv_name; + else basename++; + sz += sprintf(buf+sz, "] %-25s", basename); + if (strlen(basename) > 25) + sz += sprintf(buf+sz, + "\n "); + sz += sprintf(buf+sz, "%9d /%-6d ", + lv_ptr->lv_size >> 1, + lv_ptr->lv_size / vg_ptr->pe_size); + + if (lv_ptr->lv_open == 0) + sz += sprintf(buf+sz, "close"); + else + sz += sprintf(buf+sz, "%dx open", + lv_ptr->lv_open); + + return sz; +} + + +/* + * provide PV info for proc filesystem use (global) + */ +int lvm_pv_info(pv_t *pv_ptr, char *buf) { + int sz = 0; + char inactive_flag = 'A', allocation_flag = ' '; + char *pv_name = NULL; + + if (!(pv_ptr->pv_status & PV_ACTIVE)) + inactive_flag = 'I'; + allocation_flag = 'A'; + if (!(pv_ptr->pv_allocatable & PV_ALLOCATABLE)) + allocation_flag = 'N'; + pv_name = strrchr(pv_ptr->pv_name+1,'/'); + if ( pv_name == 0) pv_name = pv_ptr->pv_name; + else pv_name++; + sz = sprintf(buf, + "[%c%c] %-21s %8d /%-6d " + "%8d /%-6d %8d /%-6d", + inactive_flag, + allocation_flag, + pv_name, + pv_ptr->pe_total * + pv_ptr->pe_size >> 1, + pv_ptr->pe_total, + pv_ptr->pe_allocated * + pv_ptr->pe_size >> 1, + pv_ptr->pe_allocated, + (pv_ptr->pe_total - + pv_ptr->pe_allocated) * + pv_ptr->pe_size >> 1, + pv_ptr->pe_total - + pv_ptr->pe_allocated); + return sz; +} + + #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS /* - * Support function /proc-Filesystem + * Support functions /proc-Filesystem */ + #define LVM_PROC_BUF ( i == 0 ? dummy_buf : &buf[sz]) -static int lvm_proc_get_info(char *page, char **start, off_t pos, int count) +/* + * provide global LVM information + */ +static int lvm_proc_get_global_info(char *page, char **start, off_t pos, int count, int *eof, void *data) { int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter, - lv_open_total, pe_t_bytes, lv_block_exception_t_bytes, seconds; + lv_open_total, pe_t_bytes, hash_table_bytes, lv_block_exception_t_bytes, seconds; static off_t sz; off_t sz_last; - char allocation_flag, inactive_flag, rw_flag, stripes_flag; - char *lv_name, *pv_name; static char *buf = NULL; static char dummy_buf[160]; /* sized for 2 lines */ vg_t *vg_ptr; @@ -1022,13 +1288,16 @@ #ifdef DEBUG_LVM_PROC_GET_INFO printk(KERN_DEBUG - "%s - lvm_proc_get_info CALLED pos: %lu count: %d whence: %d\n", + "%s - lvm_proc_get_global_info CALLED pos: %lu count: %d whence: %d\n", lvm_name, pos, count, whence); #endif + MOD_INC_USE_COUNT; + if (pos == 0 || buf == NULL) { sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \ - lv_open_total = pe_t_bytes = lv_block_exception_t_bytes = 0; + lv_open_total = pe_t_bytes = hash_table_bytes = \ + lv_block_exception_t_bytes = 0; /* search for activity */ for (v = 0; v < ABS_MAX_VG; v++) { @@ -1040,6 +1309,7 @@ for (l = 0; l < vg[v]->lv_max; l++) { if ((lv_ptr = vg_ptr->lv[l]) != NULL) { pe_t_bytes += lv_ptr->lv_allocated_le; + hash_table_bytes += lv_ptr->lv_snapshot_hash_table_size; if (lv_ptr->lv_block_exception != NULL) lv_block_exception_t_bytes += lv_ptr->lv_remap_end; if (lv_ptr->lv_open > 0) { @@ -1057,9 +1327,11 @@ if (buf != NULL) { #ifdef DEBUG_KFREE printk(KERN_DEBUG - "%s -- kfree %d\n", lvm_name, __LINE__); + "%s -- vfree %d\n", lvm_name, __LINE__); #endif - kfree(buf); + lock_kernel(); + vfree(buf); + unlock_kernel(); buf = NULL; } /* 2 times: first to get size to allocate buffer, @@ -1094,7 +1366,7 @@ vg_counter * sizeof(vg_t) + pv_counter * sizeof(pv_t) + lv_counter * sizeof(lv_t) + - pe_t_bytes + lv_block_exception_t_bytes + sz_last, + pe_t_bytes + hash_table_bytes + lv_block_exception_t_bytes + sz_last, lvm_iop_version); seconds = CURRENT_TIME - loadtime; @@ -1115,26 +1387,7 @@ for (v = 0; v < ABS_MAX_VG; v++) { /* volume group */ if ((vg_ptr = vg[v]) != NULL) { - inactive_flag = ' '; - if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I'; - sz += sprintf(LVM_PROC_BUF, - "\nVG: %c%s [%d PV, %d LV/%d open] " - " PE Size: %d KB\n" - " Usage [KB/PE]: %d /%d total " - "%d /%d used %d /%d free", - inactive_flag, - vg_ptr->vg_name, - vg_ptr->pv_cur, - vg_ptr->lv_cur, - vg_ptr->lv_open, - vg_ptr->pe_size >> 1, - vg_ptr->pe_size * vg_ptr->pe_total >> 1, - vg_ptr->pe_total, - vg_ptr->pe_allocated * vg_ptr->pe_size >> 1, - vg_ptr->pe_allocated, - (vg_ptr->pe_total - vg_ptr->pe_allocated) * - vg_ptr->pe_size >> 1, - vg_ptr->pe_total - vg_ptr->pe_allocated); + sz += lvm_vg_info(vg_ptr, LVM_PROC_BUF); /* physical volumes */ sz += sprintf(LVM_PROC_BUF, @@ -1143,32 +1396,8 @@ c = 0; for (p = 0; p < vg_ptr->pv_max; p++) { if ((pv_ptr = vg_ptr->pv[p]) != NULL) { - inactive_flag = 'A'; - if (!(pv_ptr->pv_status & PV_ACTIVE)) - inactive_flag = 'I'; - allocation_flag = 'A'; - if (!(pv_ptr->pv_allocatable & PV_ALLOCATABLE)) - allocation_flag = 'N'; - pv_name = strchr(pv_ptr->pv_name+1,'/'); - if ( pv_name == 0) pv_name = pv_ptr->pv_name; - else pv_name++; - sz += sprintf(LVM_PROC_BUF, - "[%c%c] %-21s %8d /%-6d " - "%8d /%-6d %8d /%-6d", - inactive_flag, - allocation_flag, - pv_name, - pv_ptr->pe_total * - pv_ptr->pe_size >> 1, - pv_ptr->pe_total, - pv_ptr->pe_allocated * - pv_ptr->pe_size >> 1, - pv_ptr->pe_allocated, - (pv_ptr->pe_total - - pv_ptr->pe_allocated) * - pv_ptr->pe_size >> 1, - pv_ptr->pe_total - - pv_ptr->pe_allocated); + sz += lvm_pv_info(pv_ptr, LVM_PROC_BUF); + c++; if (c < vg_ptr->pv_cur) sz += sprintf(LVM_PROC_BUF, @@ -1181,47 +1410,9 @@ "\n LV%s ", vg_ptr->lv_cur == 1 ? ": " : "s:"); c = 0; - for (l = 0; l < vg[v]->lv_max; l++) { + for (l = 0; l < vg_ptr->lv_max; l++) { if ((lv_ptr = vg_ptr->lv[l]) != NULL) { - inactive_flag = 'A'; - if (!(lv_ptr->lv_status & LV_ACTIVE)) - inactive_flag = 'I'; - rw_flag = 'R'; - if (lv_ptr->lv_access & LV_WRITE) - rw_flag = 'W'; - allocation_flag = 'D'; - if (lv_ptr->lv_allocation & LV_CONTIGUOUS) - allocation_flag = 'C'; - stripes_flag = 'L'; - if (lv_ptr->lv_stripes > 1) - stripes_flag = 'S'; - sz += sprintf(LVM_PROC_BUF, - "[%c%c%c%c", - inactive_flag, - rw_flag, - allocation_flag, - stripes_flag); - if (lv_ptr->lv_stripes > 1) - sz += sprintf(LVM_PROC_BUF, "%-2d", - lv_ptr->lv_stripes); - else - sz += sprintf(LVM_PROC_BUF, " "); - lv_name = strrchr(lv_ptr->lv_name, '/'); - if ( lv_name == 0) lv_name = lv_ptr->lv_name; - else lv_name++; - sz += sprintf(LVM_PROC_BUF, "] %-25s", lv_name); - if (strlen(lv_name) > 25) - sz += sprintf(LVM_PROC_BUF, - "\n "); - sz += sprintf(LVM_PROC_BUF, "%9d /%-6d ", - lv_ptr->lv_size >> 1, - lv_ptr->lv_size / vg[v]->pe_size); - - if (lv_ptr->lv_open == 0) - sz += sprintf(LVM_PROC_BUF, "close"); - else - sz += sprintf(LVM_PROC_BUF, "%dx open", - lv_ptr->lv_open); + sz += lvm_lv_info(vg_ptr, lv_ptr, LVM_PROC_BUF); c++; if (c < vg_ptr->lv_cur) sz += sprintf(LVM_PROC_BUF, @@ -1234,8 +1425,12 @@ } } if (buf == NULL) { - if ((buf = vmalloc(sz)) == NULL) { + lock_kernel(); + buf = vmalloc(sz); + unlock_kernel(); + if (buf == NULL) { sz = 0; + MOD_DEC_USE_COUNT; return sprintf(page, "%s - vmalloc error at line %d\n", lvm_name, __LINE__); } @@ -1243,8 +1438,11 @@ sz_last = sz; } } + MOD_DEC_USE_COUNT; if (pos > sz - 1) { + lock_kernel(); vfree(buf); + unlock_kernel(); buf = NULL; return 0; } @@ -1253,47 +1451,111 @@ return sz - pos; else return count; -} /* lvm_proc_get_info() */ +} /* lvm_proc_get_global_info() */ #endif /* #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS */ /* + * provide VG information + */ +int lvm_proc_read_vg_info(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int sz = 0; + vg_t *vg = data; + + sz += sprintf ( page+sz, "name: %s\n", vg->vg_name); + sz += sprintf ( page+sz, "size: %u\n", + vg->pe_total * vg->pe_size / 2); + sz += sprintf ( page+sz, "access: %u\n", vg->vg_access); + sz += sprintf ( page+sz, "status: %u\n", vg->vg_status); + sz += sprintf ( page+sz, "number: %u\n", vg->vg_number); + sz += sprintf ( page+sz, "LV max: %u\n", vg->lv_max); + sz += sprintf ( page+sz, "LV current: %u\n", vg->lv_cur); + sz += sprintf ( page+sz, "LV open: %u\n", vg->lv_open); + sz += sprintf ( page+sz, "PV max: %u\n", vg->pv_max); + sz += sprintf ( page+sz, "PV current: %u\n", vg->pv_cur); + sz += sprintf ( page+sz, "PV active: %u\n", vg->pv_act); + sz += sprintf ( page+sz, "PE size: %u\n", vg->pe_size / 2); + sz += sprintf ( page+sz, "PE total: %u\n", vg->pe_total); + sz += sprintf ( page+sz, "PE allocated: %u\n", vg->pe_allocated); + sz += sprintf ( page+sz, "uuid: %s\n", vg->vg_uuid); + + return sz; +} + + +/* + * provide LV information + */ +int lvm_proc_read_lv_info(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int sz = 0; + lv_t *lv = data; + + sz += sprintf ( page+sz, "name: %s\n", lv->lv_name); + sz += sprintf ( page+sz, "size: %u\n", lv->lv_size); + sz += sprintf ( page+sz, "access: %u\n", lv->lv_access); + sz += sprintf ( page+sz, "status: %u\n", lv->lv_status); + sz += sprintf ( page+sz, "number: %u\n", lv->lv_number); + sz += sprintf ( page+sz, "open: %u\n", lv->lv_open); + sz += sprintf ( page+sz, "allocation: %u\n", lv->lv_allocation); + sz += sprintf ( page+sz, "device: %02u:%02u\n", + MAJOR(lv->lv_dev), MINOR(lv->lv_dev)); + + return sz; +} + + +/* + * provide PV information + */ +int lvm_proc_read_pv_info(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int sz = 0; + pv_t *pv = data; + + sz += sprintf ( page+sz, "name: %s\n", pv->pv_name); + sz += sprintf ( page+sz, "size: %u\n", pv->pv_size); + sz += sprintf ( page+sz, "status: %u\n", pv->pv_status); + sz += sprintf ( page+sz, "number: %u\n", pv->pv_number); + sz += sprintf ( page+sz, "allocatable: %u\n", pv->pv_allocatable); + sz += sprintf ( page+sz, "LV current: %u\n", pv->lv_cur); + sz += sprintf ( page+sz, "PE size: %u\n", pv->pe_size / 2); + sz += sprintf ( page+sz, "PE total: %u\n", pv->pe_total); + sz += sprintf ( page+sz, "PE allocated: %u\n", pv->pe_allocated); + sz += sprintf ( page+sz, "device: %02u:%02u\n", + MAJOR(pv->pv_dev), MINOR(pv->pv_dev)); + sz += sprintf ( page+sz, "uuid: %s\n", pv->pv_uuid); + + + return sz; +} + + +/* * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c * (see init_module/lvm_init) */ static int lvm_map(struct buffer_head *bh, int rw) { - int minor = MINOR(bh->b_rdev); + int minor = MINOR(bh->b_dev); + int ret = 0; ulong index; ulong pe_start; ulong size = bh->b_size >> 9; - ulong rsector_tmp = bh->b_rsector; + ulong rsector_tmp = bh->b_blocknr * size; ulong rsector_sav; - kdev_t rdev_tmp = bh->b_rdev; + kdev_t rdev_tmp = bh->b_dev; kdev_t rdev_sav; - lv_t *lv = vg[VG_BLK(minor)]->lv[LV_BLK(minor)]; + vg_t *vg_this = vg[VG_BLK(minor)]; + lv_t *lv = vg_this->lv[LV_BLK(minor)]; if (!(lv->lv_status & LV_ACTIVE)) { printk(KERN_ALERT "%s - lvm_map: ll_rw_blk for inactive LV %s\n", lvm_name, lv->lv_name); - goto error; - } -/* - if ( lv->lv_access & LV_SNAPSHOT) - printk ( "%s -- %02d:%02d block: %lu rw: %d\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), bh->b_blocknr, rw); - */ - - /* take care of snapshot chunk writes before - check for writable logical volume */ - if ((lv->lv_access & LV_SNAPSHOT) && - MAJOR(bh->b_rdev) != 0 && - MAJOR(bh->b_rdev) != MAJOR_NR && - (rw == WRITEA || rw == WRITE)) - { - printk ( "%s -- doing snapshot write for %02d:%02d[%02d:%02d] b_blocknr: %lu b_rsector: %lu\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), MAJOR ( bh->b_rdev), MINOR ( bh->b_rdev), bh->b_blocknr, bh->b_rsector); - goto error; + return -1; } if ((rw == WRITE || rw == WRITEA) && @@ -1301,7 +1563,7 @@ printk(KERN_CRIT "%s - lvm_map: ll_rw_blk write for readonly LV %s\n", lvm_name, lv->lv_name); - goto error; + return -1; } #ifdef DEBUG_MAP printk(KERN_DEBUG @@ -1315,9 +1577,10 @@ if (rsector_tmp + size > lv->lv_size) { printk(KERN_ALERT - "%s - lvm_map *rsector: %lu or size: %lu wrong for" - " minor: %2d\n", lvm_name, rsector_tmp, size, minor); - goto error; + "%s - lvm_map access beyond end of device; *rsector: " + "%lu or size: %lu wrong for minor: %2d\n", + lvm_name, rsector_tmp, size, minor); + return -1; } rsector_sav = rsector_tmp; rdev_sav = rdev_tmp; @@ -1326,10 +1589,10 @@ /* linear mapping */ if (lv->lv_stripes < 2) { /* get the index */ - index = rsector_tmp / vg[VG_BLK(minor)]->pe_size; + index = rsector_tmp / vg_this->pe_size; pe_start = lv->lv_current_pe[index].pe; rsector_tmp = lv->lv_current_pe[index].pe + - (rsector_tmp % vg[VG_BLK(minor)]->pe_size); + (rsector_tmp % vg_this->pe_size); rdev_tmp = lv->lv_current_pe[index].dev; #ifdef DEBUG_MAP @@ -1347,7 +1610,7 @@ ulong stripe_index; ulong stripe_length; - stripe_length = vg[VG_BLK(minor)]->pe_size * lv->lv_stripes; + stripe_length = vg_this->pe_size * lv->lv_stripes; stripe_index = (rsector_tmp % stripe_length) / lv->lv_stripesize; index = rsector_tmp / stripe_length + (stripe_index % lv->lv_stripes) * @@ -1379,7 +1642,7 @@ if (rdev_tmp == pe_lock_req.data.pv_dev && rsector_tmp >= pe_lock_req.data.pv_offset && rsector_tmp < (pe_lock_req.data.pv_offset + - vg[VG_BLK(minor)]->pe_size)) { + vg_this->pe_size)) { sleep_on(&lvm_map_wait); rsector_tmp = rsector_sav; rdev_tmp = rdev_sav; @@ -1393,7 +1656,7 @@ lv->lv_current_pe[index].reads++; /* snapshot volume exception handling on physical device address base */ - if (lv->lv_access & (LV_SNAPSHOT | LV_SNAPSHOT_ORG)) { + if (lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG)) { /* original logical volume */ if (lv->lv_access & LV_SNAPSHOT_ORG) { if (rw == WRITE || rw == WRITEA) @@ -1404,6 +1667,8 @@ for (lv_ptr = lv->lv_snapshot_next; lv_ptr != NULL; lv_ptr = lv_ptr->lv_snapshot_next) { + /* Check for inactive snapshot */ + if (!(lv_ptr->lv_status & LV_ACTIVE)) continue; down(&lv->lv_snapshot_org->lv_snapshot_sem); /* do we still have exception storage for this snapshot free? */ if (lv_ptr->lv_block_exception != NULL) { @@ -1414,11 +1679,13 @@ pe_start, lv_ptr)) { /* create a new mapping */ - lvm_snapshot_COW(rdev_tmp, - rsector_tmp, - pe_start, - rsector_sav, - lv_ptr); + if (!(ret = lvm_snapshot_COW(rdev_tmp, + rsector_tmp, + pe_start, + rsector_sav, + lv_ptr))) + ret = lvm_write_COW_table_block(vg_this, + lv_ptr); } rdev_tmp = rdev_sav; rsector_tmp = rsector_sav; @@ -1437,11 +1704,7 @@ bh->b_rdev = rdev_tmp; bh->b_rsector = rsector_tmp; - return 1; - - error: - buffer_IO_error(bh); - return -1; + return ret; } /* lvm_map() */ @@ -1487,7 +1750,9 @@ /* * make request function */ -static int lvm_make_request_fn(request_queue_t *q, int rw, struct buffer_head *bh) +static int lvm_make_request_fn(request_queue_t *q, + int rw, + struct buffer_head *bh) { if (lvm_map(bh, rw)<0) return 0; /* failure, buffer_IO_error has been called, don't recurse */ @@ -1495,12 +1760,6 @@ return 1; /* all ok, mapping done, call lower level driver */ } -/* - * plug device function is a noop because plugging has to happen - * in the queue of the physical blockdevice to allow the - * elevator to do a better job. - */ -static void lvm_plug_device_noop(request_queue_t *q, kdev_t dev) { } /******************************************************************** * @@ -1563,7 +1822,8 @@ case UNLOCK_PE: pe_lock_req.lock = UNLOCK_PE; - pe_lock_req.data.lv_dev = pe_lock_req.data.pv_dev = 0; + pe_lock_req.data.lv_dev = \ + pe_lock_req.data.pv_dev = \ pe_lock_req.data.pv_offset = 0; wake_up(&lvm_map_wait); break; @@ -1593,8 +1853,7 @@ if (lv_ptr != NULL && strcmp(lv_ptr->lv_name, le_remap_req.lv_name) == 0) { - for (le = 0; le < lv_ptr->lv_allocated_le; - le++) { + for (le = 0; le < lv_ptr->lv_allocated_le; le++) { if (lv_ptr->lv_current_pe[le].dev == le_remap_req.old_dev && lv_ptr->lv_current_pe[le].pe == @@ -1618,12 +1877,11 @@ */ int lvm_do_vg_create(int minor, void *arg) { - int snaporg_minor = 0; - ulong l, p; + int ret = 0; + ulong l, ls = 0, p, size; lv_t lv; vg_t *vg_ptr; - pv_t *pv_ptr; - lv_t *lv_ptr; + lv_t **snap_lv_ptr; if (vg[VG_CHR(minor)] != NULL) return -EPERM; @@ -1639,18 +1897,11 @@ return -EFAULT; } - vg_devfs_handle[vg_ptr->vg_number] = devfs_mk_dir(0, vg_ptr->vg_name, NULL); - ch_devfs_handle[vg_ptr->vg_number] = devfs_register( - vg_devfs_handle[vg_ptr->vg_number] , "group", - DEVFS_FL_DEFAULT, LVM_CHAR_MAJOR, vg_ptr->vg_number, - S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, - &lvm_chr_fops, NULL); - /* we are not that active so far... */ vg_ptr->vg_status &= ~VG_ACTIVE; vg[VG_CHR(minor)] = vg_ptr; - vg[VG_CHR(minor)]->pe_allocated = 0; + if (vg_ptr->pv_max > ABS_MAX_PV) { printk(KERN_WARNING "%s -- Can't activate VG: ABS_MAX_PV too small\n", @@ -1667,38 +1918,30 @@ vg_ptr = NULL; return -EPERM; } + /* get the physical volume structures */ vg_ptr->pv_act = vg_ptr->pv_cur = 0; for (p = 0; p < vg_ptr->pv_max; p++) { /* user space address */ if ((pvp = vg_ptr->pv[p]) != NULL) { - pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL); - if (pv_ptr == NULL) { - printk(KERN_CRIT - "%s -- VG_CREATE: kmalloc error PV at line %d\n", - lvm_name, __LINE__); - lvm_do_vg_remove(minor); - return -ENOMEM; - } - if (copy_from_user(pv_ptr, pvp, sizeof(pv_t)) != 0) { + ret = lvm_do_pv_create(pvp, vg_ptr, p); + if ( ret != 0) { lvm_do_vg_remove(minor); - return -EFAULT; + return ret; } - /* We don't need the PE list - in kernel space as with LVs pe_t list (see below) */ - pv_ptr->pe = NULL; - pv_ptr->pe_allocated = 0; - pv_ptr->pv_status = PV_ACTIVE; - vg_ptr->pv_act++; - vg_ptr->pv_cur++; - -#ifdef LVM_GET_INODE - /* insert a dummy inode for fs_may_mount */ - pv_ptr->inode = lvm_get_inode(pv_ptr->pv_dev); -#endif } } + size = vg_ptr->lv_max * sizeof(lv_t *); + if ((snap_lv_ptr = vmalloc ( size)) == NULL) { + printk(KERN_CRIT + "%s -- VG_CREATE: vmalloc error snapshot LVs at line %d\n", + lvm_name, __LINE__); + lvm_do_vg_remove(minor); + return -EFAULT; + } + memset(snap_lv_ptr, 0, size); + /* get the logical volume structures */ vg_ptr->lv_cur = 0; for (l = 0; l < vg_ptr->lv_max; l++) { @@ -1708,7 +1951,14 @@ lvm_do_vg_remove(minor); return -EFAULT; } + if ( lv.lv_access & LV_SNAPSHOT) { + snap_lv_ptr[ls] = lvp; + vg_ptr->lv[l] = NULL; + ls++; + continue; + } vg_ptr->lv[l] = NULL; + /* only create original logical volumes for now */ if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) { lvm_do_vg_remove(minor); return -EFAULT; @@ -1718,55 +1968,41 @@ /* Second path to correct snapshot logical volumes which are not in place during first path above */ - for (l = 0; l < vg_ptr->lv_max; l++) { - if ((lv_ptr = vg_ptr->lv[l]) != NULL && - vg_ptr->lv[l]->lv_access & LV_SNAPSHOT) { - snaporg_minor = lv_ptr->lv_snapshot_minor; - if (vg_ptr->lv[LV_BLK(snaporg_minor)] != NULL) { - /* get pointer to original logical volume */ - lv_ptr = vg_ptr->lv[l]->lv_snapshot_org = - vg_ptr->lv[LV_BLK(snaporg_minor)]; - - /* set necessary fields of original logical volume */ - lv_ptr->lv_access |= LV_SNAPSHOT_ORG; - lv_ptr->lv_snapshot_minor = 0; - lv_ptr->lv_snapshot_org = lv_ptr; - lv_ptr->lv_snapshot_prev = NULL; + for (l = 0; l < ls; l++) { + lvp = snap_lv_ptr[l]; + if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) { + lvm_do_vg_remove(minor); + return -EFAULT; + } + if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) { + lvm_do_vg_remove(minor); + return -EFAULT; + } + } - /* find last snapshot logical volume in the chain */ - while (lv_ptr->lv_snapshot_next != NULL) - lv_ptr = lv_ptr->lv_snapshot_next; +#ifdef CONFIG_DEVFS_FS + vg_devfs_handle[vg_ptr->vg_number] = devfs_mk_dir(0, vg_ptr->vg_name, NULL); + ch_devfs_handle[vg_ptr->vg_number] = devfs_register( + vg_devfs_handle[vg_ptr->vg_number] , "group", + DEVFS_FL_DEFAULT, LVM_CHAR_MAJOR, vg_ptr->vg_number, + S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, + &lvm_chr_fops, NULL); +#endif - /* set back pointer to this last one in our new logical volume */ - vg_ptr->lv[l]->lv_snapshot_prev = lv_ptr; +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_create_proc_entry_of_vg ( vg_ptr); +#endif - /* last logical volume now points to our new snapshot volume */ - lv_ptr->lv_snapshot_next = vg_ptr->lv[l]; + vfree(snap_lv_ptr); - /* now point to the new one */ - lv_ptr = lv_ptr->lv_snapshot_next; + vg_count++; - /* set necessary fields of new snapshot logical volume */ - lv_ptr->lv_snapshot_next = NULL; - lv_ptr->lv_current_pe = - vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_current_pe; - lv_ptr->lv_allocated_le = - vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_allocated_le; - lv_ptr->lv_current_le = - vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_current_le; - lv_ptr->lv_size = - vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_size; - } - } - } - vg_count++; + MOD_INC_USE_COUNT; /* let's go active */ vg_ptr->vg_status |= VG_ACTIVE; - MOD_INC_USE_COUNT; - return 0; } /* lvm_do_vg_create() */ @@ -1776,26 +2012,18 @@ */ static int lvm_do_vg_extend(vg_t *vg_ptr, void *arg) { + int ret = 0; uint p; pv_t *pv_ptr; if (vg_ptr == NULL) return -ENXIO; if (vg_ptr->pv_cur < vg_ptr->pv_max) { for (p = 0; p < vg_ptr->pv_max; p++) { - if (vg_ptr->pv[p] == NULL) { - if ((pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL)) == NULL) { - printk(KERN_CRIT - "%s -- VG_EXTEND: kmalloc error PV at line %d\n", - lvm_name, __LINE__); - return -ENOMEM; - } - if (copy_from_user(pv_ptr, arg, sizeof(pv_t)) != 0) { - kfree(pv_ptr); - vg_ptr->pv[p] = NULL; - return -EFAULT; - } + if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) { + ret = lvm_do_pv_create(arg, vg_ptr, p); + lvm_do_create_proc_entry_of_pv ( vg_ptr, pv_ptr); + if ( ret != 0) return ret; - pv_ptr->pv_status = PV_ACTIVE; /* We don't need the PE list in kernel space like LVs pe_t list */ pv_ptr->pe = NULL; @@ -1818,8 +2046,7 @@ /* * character device support function VGDA reduce */ -static int lvm_do_vg_reduce(vg_t *vg_ptr, void *arg) -{ +static int lvm_do_vg_reduce(vg_t *vg_ptr, void *arg) { uint p; pv_t *pv_ptr; @@ -1837,10 +2064,7 @@ pv_ptr->pe_total; vg_ptr->pv_cur--; vg_ptr->pv_act--; -#ifdef LVM_GET_INODE - lvm_clear_inode(pv_ptr->inode); -#endif - kfree(pv_ptr); + lvm_do_pv_remove(vg_ptr, p); /* Make PV pointer array contiguous */ for (; p < vg_ptr->pv_max - 1; p++) vg_ptr->pv[p] = vg_ptr->pv[p + 1]; @@ -1853,6 +2077,53 @@ /* + * character device support function VG rename + */ +static int lvm_do_vg_rename(vg_t *vg_ptr, void *arg) +{ + int l = 0, p = 0, len = 0; + char vg_name[NAME_LEN] = { 0,}; + char lv_name[NAME_LEN] = { 0,}; + char *ptr = NULL; + lv_t *lv_ptr = NULL; + pv_t *pv_ptr = NULL; + + if (copy_from_user(vg_name, arg, sizeof(vg_name)) != 0) + return -EFAULT; + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_remove_proc_entry_of_vg ( vg_ptr); +#endif + + strncpy ( vg_ptr->vg_name, vg_name, sizeof ( vg_name)-1); + for ( l = 0; l < vg_ptr->lv_max; l++) + { + if ((lv_ptr = vg_ptr->lv[l]) == NULL) continue; + strncpy(lv_ptr->vg_name, vg_name, sizeof ( vg_name)); + ptr = strrchr(lv_ptr->lv_name, '/'); + if (ptr == NULL) ptr = lv_ptr->lv_name; + strncpy(lv_name, ptr, sizeof ( lv_name)); + len = sizeof(LVM_DIR_PREFIX); + strcpy(lv_ptr->lv_name, LVM_DIR_PREFIX); + strncat(lv_ptr->lv_name, vg_name, NAME_LEN - len); + len += strlen ( vg_name); + strncat(lv_ptr->lv_name, lv_name, NAME_LEN - len); + } + for ( p = 0; p < vg_ptr->pv_max; p++) + { + if ( (pv_ptr = vg_ptr->pv[p]) == NULL) continue; + strncpy(pv_ptr->vg_name, vg_name, NAME_LEN); + } + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_create_proc_entry_of_vg ( vg_ptr); +#endif + + return 0; +} /* lvm_do_vg_rename */ + + +/* * character device support function VGDA remove */ static int lvm_do_vg_remove(int minor) @@ -1873,9 +2144,6 @@ /* let's go inactive */ vg_ptr->vg_status &= ~VG_ACTIVE; - devfs_unregister (ch_devfs_handle[vg_ptr->vg_number]); - devfs_unregister (vg_devfs_handle[vg_ptr->vg_number]); - /* free LVs */ /* first free snapshot logical volumes */ for (i = 0; i < vg_ptr->lv_max; i++) { @@ -1902,17 +2170,23 @@ printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__); #endif -#ifdef LVM_GET_INODE - lvm_clear_inode(pv_ptr->inode); -#endif - kfree(pv_ptr); - vg[VG_CHR(minor)]->pv[i] = NULL; + lvm_do_pv_remove(vg_ptr, i); } } +#ifdef CONFIG_DEVFS_FS + devfs_unregister (ch_devfs_handle[vg_ptr->vg_number]); + devfs_unregister (vg_devfs_handle[vg_ptr->vg_number]); +#endif + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_remove_proc_entry_of_vg ( vg_ptr); +#endif + #ifdef DEBUG_KFREE printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__); #endif + kfree(vg_ptr); vg[VG_CHR(minor)] = NULL; @@ -1925,13 +2199,68 @@ /* + * character device support function physical volume create + */ +static int lvm_do_pv_create(pv_t *pvp, vg_t *vg_ptr, ulong p) { + pv_t *pv_ptr = NULL; + + pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL); + if (pv_ptr == NULL) { + printk(KERN_CRIT + "%s -- VG_CREATE: kmalloc error PV at line %d\n", + lvm_name, __LINE__); + return -ENOMEM; + } + if (copy_from_user(pv_ptr, pvp, sizeof(pv_t)) != 0) { + return -EFAULT; + } + /* We don't need the PE list + in kernel space as with LVs pe_t list (see below) */ + pv_ptr->pe = NULL; + pv_ptr->pe_allocated = 0; + pv_ptr->pv_status = PV_ACTIVE; + vg_ptr->pv_act++; + vg_ptr->pv_cur++; + +#ifdef LVM_GET_INODE + /* insert a dummy inode for fs_may_mount */ + pv_ptr->inode = lvm_get_inode(pv_ptr->pv_dev); +#endif + + return 0; +} /* lvm_do_pv_create() */ + + +/* + * character device support function physical volume create + */ +static int lvm_do_pv_remove(vg_t *vg_ptr, ulong p) { + pv_t *pv_ptr = vg_ptr->pv[p]; + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_remove_proc_entry_of_pv ( vg_ptr, pv_ptr); +#endif + vg_ptr->pe_total -= + pv_ptr->pe_total; + vg_ptr->pv_cur--; + vg_ptr->pv_act--; +#ifdef LVM_GET_INODE + lvm_clear_inode(pv_ptr->inode); +#endif + kfree(pv_ptr); + vg_ptr->pv[p] = NULL; + + return 0; +} + + +/* * character device support function logical volume create */ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) { - int l, le, l_new, p, size; + int e, ret, l, le, l_new, p, size; ulong lv_status_save; - char *lv_tmp, *lv_buf = NULL; lv_block_exception_t *lvbe = lv->lv_block_exception; vg_t *vg_ptr = vg[VG_CHR(minor)]; lv_t *lv_ptr = NULL; @@ -1946,7 +2275,7 @@ return -EEXIST; } - /* in case of lv_remove(), lv_create() pair; for eg. lvrename does this */ + /* in case of lv_remove(), lv_create() pair */ l_new = -1; if (vg_ptr->lv[lv->lv_number] == NULL) l_new = lv->lv_number; @@ -1957,7 +2286,7 @@ } } if (l_new == -1) return -EPERM; - else l = l_new; + else l = l_new; if ((lv_ptr = kmalloc(sizeof(lv_t),GFP_KERNEL)) == NULL) {; printk(KERN_CRIT "%s -- LV_CREATE: kmalloc error LV at line %d\n", @@ -1970,10 +2299,16 @@ lv_status_save = lv_ptr->lv_status; lv_ptr->lv_status &= ~LV_ACTIVE; lv_ptr->lv_snapshot_org = \ - lv_ptr->lv_snapshot_prev = \ - lv_ptr->lv_snapshot_next = NULL; + lv_ptr->lv_snapshot_prev = \ + lv_ptr->lv_snapshot_next = NULL; lv_ptr->lv_block_exception = NULL; + lv_ptr->lv_iobuf = NULL; + lv_ptr->lv_snapshot_hash_table = NULL; + lv_ptr->lv_snapshot_hash_table_size = 0; + lv_ptr->lv_snapshot_hash_mask = 0; + lv_ptr->lv_COW_table_page = NULL; init_MUTEX(&lv_ptr->lv_snapshot_sem); + lv_ptr->lv_snapshot_use_rate = 0; vg_ptr->lv[l] = lv_ptr; /* get the PE structures from user space if this @@ -2032,7 +2367,7 @@ vg[VG_CHR(minor)]->lv[l] = NULL; return -EFAULT; } - /* get pointer to original logical volume */ + /* point to the original logical volume */ lv_ptr = lv_ptr->lv_snapshot_org; lv_ptr->lv_snapshot_minor = 0; @@ -2043,7 +2378,8 @@ lv_ptr = lv_ptr->lv_snapshot_next; /* now lv_ptr points to the last existing snapshot in the chain */ vg_ptr->lv[l]->lv_snapshot_prev = lv_ptr; - /* our new one now back points to the previous last in the chain */ + /* our new one now back points to the previous last in the chain + which can be the original logical volume */ lv_ptr = vg_ptr->lv[l]; /* now lv_ptr points to our new last snapshot logical volume */ lv_ptr->lv_snapshot_org = lv_ptr->lv_snapshot_prev->lv_snapshot_org; @@ -2054,16 +2390,19 @@ lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size; lv_ptr->lv_stripes = lv_ptr->lv_snapshot_org->lv_stripes; lv_ptr->lv_stripesize = lv_ptr->lv_snapshot_org->lv_stripesize; + if ((ret = lvm_snapshot_alloc(lv_ptr)) != 0) { - int err = lvm_snapshot_alloc(lv_ptr); - if (err) - { - vfree(lv_ptr->lv_block_exception); - kfree(lv_ptr); - vg[VG_CHR(minor)]->lv[l] = NULL; - return err; - } + vfree(lv_ptr->lv_block_exception); + kfree(lv_ptr); + vg[VG_CHR(minor)]->lv[l] = NULL; + return ret; } + for ( e = 0; e < lv_ptr->lv_remap_ptr; e++) + lvm_hash_link (lv_ptr->lv_block_exception + e, lv_ptr->lv_block_exception[e].rdev_org, lv_ptr->lv_block_exception[e].rsector_org, lv_ptr); + /* need to fill the COW exception table data + into the page for disk i/o */ + lvm_snapshot_fill_COW_page(vg_ptr, lv_ptr); + init_waitqueue_head(&lv_ptr->lv_snapshot_wait); } else { vfree(lv_ptr->lv_block_exception); kfree(lv_ptr); @@ -2083,12 +2422,15 @@ lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = vg_ptr->vg_number; vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = lv_ptr->lv_number; - read_ahead[MAJOR_NR] = lv_ptr->lv_read_ahead = LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); + LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); vg_ptr->lv_cur++; lv_ptr->lv_status = lv_status_save; - strtok(lv->lv_name, "/"); /* /dev */ +#ifdef CONFIG_DEVFS_FS + { + char *lv_tmp, *lv_buf = NULL; + strtok(lv->lv_name, "/"); /* /dev */ while((lv_tmp = strtok(NULL, "/")) != NULL) lv_buf = lv_tmp; @@ -2097,15 +2439,43 @@ DEVFS_FL_DEFAULT, LVM_BLK_MAJOR, lv->lv_number, S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, &lvm_blk_dops, NULL); + } +#endif + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr); +#endif /* optionally add our new snapshot LV */ if (lv_ptr->lv_access & LV_SNAPSHOT) { /* sync the original logical volume */ fsync_dev(lv_ptr->lv_snapshot_org->lv_dev); +#ifdef LVM_VFS_ENHANCEMENT + /* VFS function call to sync and lock the filesystem */ + fsync_dev_lockfs(lv_ptr->lv_snapshot_org->lv_dev); +#endif + lv_ptr->lv_snapshot_org->lv_access |= LV_SNAPSHOT_ORG; + lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG; /* put ourselve into the chain */ lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr; - lv_ptr->lv_snapshot_org->lv_access |= LV_SNAPSHOT_ORG; } + + /* activate the logical volume */ + lv_ptr->lv_status |= LV_ACTIVE; + if ( lv_ptr->lv_access & LV_WRITE) + set_device_ro(lv_ptr->lv_dev, 0); + else + set_device_ro(lv_ptr->lv_dev, 1); + +#ifdef LVM_VFS_ENHANCEMENT +/* VFS function call to unlock the filesystem */ + if (lv_ptr->lv_access & LV_SNAPSHOT) { + unlockfs(lv_ptr->lv_snapshot_org->lv_dev); + } +#endif + + lv_ptr->vg = vg_ptr; + return 0; } /* lvm_do_lv_create() */ @@ -2176,7 +2546,7 @@ } } vfree(lv_ptr->lv_current_pe); - /* LV_SNAPSHOT */ + /* LV_SNAPSHOT */ } else { /* remove this snapshot logical volume from the chain */ lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next; @@ -2190,7 +2560,13 @@ lvm_snapshot_release(lv_ptr); } +#ifdef CONFIG_DEVFS_FS devfs_unregister(lv_devfs_handle[lv_ptr->lv_number]); +#endif + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_remove_proc_entry_of_lv ( vg_ptr, lv_ptr); +#endif #ifdef DEBUG_KFREE printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__); @@ -2207,8 +2583,7 @@ */ static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv) { - int l, le, p, size, old_allocated_le; - uint32_t end, lv_status_save; + ulong end, l, le, p, size, old_allocated_le; vg_t *vg_ptr = vg[VG_CHR(minor)]; lv_t *lv_ptr; pe_t *pe; @@ -2224,12 +2599,75 @@ lv_ptr = vg_ptr->lv[l]; /* check for active snapshot */ - if (lv->lv_access & (LV_SNAPSHOT | LV_SNAPSHOT_ORG)) return -EPERM; + if (lv->lv_access & LV_SNAPSHOT) + { + ulong e; + lv_block_exception_t *lvbe, *lvbe_old; + struct list_head * lvs_hash_table_old; + + if (lv->lv_block_exception == NULL) return -ENXIO; + size = lv->lv_remap_end * sizeof ( lv_block_exception_t); + if ((lvbe = vmalloc(size)) == NULL) + { + printk(KERN_CRIT + "%s -- lvm_do_lv_extend_reduce: vmalloc error LV_BLOCK_EXCEPTION " + "of %lu Byte at line %d\n", + lvm_name, size, __LINE__); + return -ENOMEM; + } + if (lv->lv_remap_end > lv_ptr->lv_remap_end) + { + if (copy_from_user(lvbe, lv->lv_block_exception, size)) + { + vfree(lvbe); + return -EFAULT; + } + } + + lvbe_old = lv_ptr->lv_block_exception; + lvs_hash_table_old = lv_ptr->lv_snapshot_hash_table; + /* we need to play on the safe side here... */ + down(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); + if (lv_ptr->lv_block_exception == NULL || + lv_ptr->lv_remap_ptr > lv_ptr->lv_remap_end) + { + up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); + vfree(lvbe); + return -EPERM; + } + memcpy(lvbe, + lv_ptr->lv_block_exception, + (lv->lv_remap_end > lv_ptr->lv_remap_end ? lv_ptr->lv_remap_ptr : lv->lv_remap_end) * sizeof(lv_block_exception_t)); + + lv_ptr->lv_block_exception = lvbe; + lv_ptr->lv_remap_end = lv->lv_remap_end; + if (lvm_snapshot_alloc_hash_table(lv_ptr) != 0) + { + lvm_drop_snapshot(lv_ptr, "hash_alloc"); + up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); + vfree(lvbe_old); + vfree(lvs_hash_table_old); + return 1; + } + + for (e = 0; e < lv_ptr->lv_remap_ptr; e++) + lvm_hash_link (lv_ptr->lv_block_exception + e, lv_ptr->lv_block_exception[e].rdev_org, lv_ptr->lv_block_exception[e].rsector_org, lv_ptr); + + up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); + + vfree(lvbe_old); + vfree(lvs_hash_table_old); + + return 0; + } + + + /* we drop in here in case it is an original logical volume */ if ((pe = vmalloc(size = lv->lv_current_le * sizeof(pe_t))) == NULL) { printk(KERN_CRIT "%s -- lvm_do_lv_extend_reduce: vmalloc error LV_CURRENT_PE " - "of %d Byte at line %d\n", + "of %lu Byte at line %d\n", lvm_name, size, __LINE__); return -ENOMEM; } @@ -2248,11 +2686,6 @@ vg_ptr->vg_name); #endif - lv_ptr->lv_status |= LV_SPINDOWN; - fsync_dev(lv_ptr->lv_dev); - lv_ptr->lv_status &= ~LV_ACTIVE; - invalidate_buffers(lv_ptr->lv_dev); - /* reduce allocation counters on PV(s) */ for (le = 0; le < lv_ptr->lv_allocated_le; le++) { vg_ptr->pe_allocated--; @@ -2270,19 +2703,29 @@ pep1 = lv_ptr->lv_current_pe; end = lv_ptr->lv_current_le; - /* save open counter */ - lv_open = lv_ptr->lv_open; + /* save open counter... */ + lv->lv_open = lv_ptr->lv_open; + lv->lv_snapshot_prev = lv_ptr->lv_snapshot_prev; + lv->lv_snapshot_next = lv_ptr->lv_snapshot_next; + lv->lv_snapshot_org = lv_ptr->lv_snapshot_org; + + lv->lv_current_pe = pe; /* save # of old allocated logical extents */ old_allocated_le = lv_ptr->lv_allocated_le; + /* in case of shrinking -> let's flush */ + if ( end > lv->lv_current_le) fsync_dev(lv_ptr->lv_dev); + /* copy preloaded LV */ - lv_status_save = lv->lv_status; - lv->lv_status |= LV_SPINDOWN; - lv->lv_status &= ~LV_ACTIVE; memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t)); - lv_ptr->lv_current_pe = pe; - lv_ptr->lv_open = lv_open; + + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0; + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size; + lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; + /* vg_lv_map array doesn't have to be changed here */ + + LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); /* save availiable i/o statistic data */ /* linear logical volume */ @@ -2290,8 +2733,8 @@ /* Check what last LE shall be used */ if (end > lv_ptr->lv_current_le) end = lv_ptr->lv_current_le; for (le = 0; le < end; le++) { - lv_ptr->lv_current_pe[le].reads = pep1[le].reads; - lv_ptr->lv_current_pe[le].writes = pep1[le].writes; + lv_ptr->lv_current_pe[le].reads += pep1[le].reads; + lv_ptr->lv_current_pe[le].writes += pep1[le].writes; } /* striped logical volume */ } else { @@ -2304,38 +2747,44 @@ for (i = source = dest = 0; i < lv_ptr->lv_stripes; i++) { for (j = 0; j < end; j++) { - lv_ptr->lv_current_pe[dest + j].reads = + lv_ptr->lv_current_pe[dest + j].reads += pep1[source + j].reads; - lv_ptr->lv_current_pe[dest + j].writes = + lv_ptr->lv_current_pe[dest + j].writes += pep1[source + j].writes; } source += old_stripe_size; dest += new_stripe_size; } } - vfree(pep1); - pep1 = NULL; - /* extend the PE count in PVs */ for (le = 0; le < lv_ptr->lv_allocated_le; le++) { vg_ptr->pe_allocated++; for (p = 0; p < vg_ptr->pv_cur; p++) { if (vg_ptr->pv[p]->pv_dev == - vg_ptr->lv[l]->lv_current_pe[le].dev) { + lv_ptr->lv_current_pe[le].dev) { vg_ptr->pv[p]->pe_allocated++; break; } } } - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0; - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size; - lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; - /* vg_lv_map array doesn't have to be changed here */ + vfree ( pep1); + pep1 = NULL; - read_ahead[MAJOR_NR] = lv_ptr->lv_read_ahead = LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); - lv_ptr->lv_status = lv_status_save; + if (lv->lv_access & LV_SNAPSHOT_ORG) + { + /* Correct the snapshot size information */ + while ((lv_ptr = lv_ptr->lv_snapshot_next) != NULL) + { + lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe; + lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le; + lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le; + lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size; + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size; + lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; + } + } return 0; } /* lvm_do_lv_extend_reduce() */ @@ -2425,6 +2874,65 @@ /* + * character device support function logical volume status by device number + */ +static int lvm_do_lv_status_bydev(vg_t * vg_ptr, void * arg) { + int l; + lv_status_bydev_req_t lv_status_bydev_req; + + if (vg_ptr == NULL) return -ENXIO; + if (copy_from_user(&lv_status_bydev_req, arg, + sizeof(lv_status_bydev_req)) != 0) + return -EFAULT; + + for ( l = 0; l < vg_ptr->lv_max; l++) { + if ( vg_ptr->lv[l] == NULL) continue; + if ( vg_ptr->lv[l]->lv_dev == lv_status_bydev_req.dev) break; + } + + if ( l == vg_ptr->lv_max) return -ENXIO; + + if (copy_to_user(lv_status_bydev_req.lv, + vg_ptr->lv[l], sizeof(lv_t)) != 0) + return -EFAULT; + + return 0; +} /* lvm_do_lv_status_bydev() */ + + +/* + * character device support function rename a logical volume + */ +static int lvm_do_lv_rename(vg_t *vg_ptr, lv_req_t *lv_req, lv_t *lv) +{ + int l = 0; + int ret = 0; + lv_t *lv_ptr = NULL; + + for (l = 0; l < vg_ptr->lv_max; l++) + { + if ( (lv_ptr = vg_ptr->lv[l]) == NULL) continue; + if (lv_ptr->lv_dev == lv->lv_dev) + { +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_remove_proc_entry_of_lv ( vg_ptr, lv_ptr); +#endif + strncpy(lv_ptr->lv_name, + lv_req->lv_name, + NAME_LEN); +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr); +#endif + break; + } + } + if (l == vg_ptr->lv_max) ret = -ENODEV; + + return ret; +} /* lvm_do_lv_rename */ + + +/* * character device support function physical volume change */ static int lvm_do_pv_change(vg_t *vg_ptr, void *arg) @@ -2494,6 +3002,140 @@ } /* lvm_do_pv_status() */ + +/* + * create a /proc entry for a logical volume + */ +inline void lvm_do_create_proc_entry_of_lv ( vg_t *vg_ptr, lv_t *lv_ptr) { + char *basename; + + if ( vg_ptr->lv_subdir_pde != NULL) { + basename = strrchr(lv_ptr->lv_name, '/'); + if (basename == NULL) basename = lv_ptr->lv_name; + else basename++; + pde = create_proc_entry(basename, S_IFREG, + vg_ptr->lv_subdir_pde); + if ( pde != NULL) { + pde->read_proc = lvm_proc_read_lv_info; + pde->data = lv_ptr; + } + } +} + + +/* + * remove a /proc entry for a logical volume + */ +inline void lvm_do_remove_proc_entry_of_lv ( vg_t *vg_ptr, lv_t *lv_ptr) { + char *basename; + + if ( vg_ptr->lv_subdir_pde != NULL) { + basename = strrchr(lv_ptr->lv_name, '/'); + if (basename == NULL) basename = lv_ptr->lv_name; + else basename++; + remove_proc_entry(basename, vg_ptr->lv_subdir_pde); + } +} + + +/* + * create a /proc entry for a physical volume + */ +inline void lvm_do_create_proc_entry_of_pv ( vg_t *vg_ptr, pv_t *pv_ptr) { + char *basename; + + basename = strrchr(pv_ptr->pv_name, '/'); + if (basename == NULL) basename = pv_ptr->pv_name; + else basename++; + pde = create_proc_entry(basename, S_IFREG, vg_ptr->pv_subdir_pde); + if ( pde != NULL) { + pde->read_proc = lvm_proc_read_pv_info; + pde->data = pv_ptr; + } +} + + +/* + * remove a /proc entry for a physical volume + */ +inline void lvm_do_remove_proc_entry_of_pv ( vg_t *vg_ptr, pv_t *pv_ptr) { + char *basename; + + basename = strrchr(pv_ptr->pv_name, '/'); + if ( vg_ptr->pv_subdir_pde != NULL) { + basename = strrchr(pv_ptr->pv_name, '/'); + if (basename == NULL) basename = pv_ptr->pv_name; + else basename++; + remove_proc_entry(basename, vg_ptr->pv_subdir_pde); + } +} + + +/* + * create a /proc entry for a volume group + */ +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS +void lvm_do_create_proc_entry_of_vg ( vg_t *vg_ptr) { + int l, p; + pv_t *pv_ptr; + lv_t *lv_ptr; + + pde = create_proc_entry(vg_ptr->vg_name, S_IFDIR, + lvm_proc_vg_subdir); + if ( pde != NULL) { + vg_ptr->vg_dir_pde = pde; + pde = create_proc_entry("group", S_IFREG, + vg_ptr->vg_dir_pde); + if ( pde != NULL) { + pde->read_proc = lvm_proc_read_vg_info; + pde->data = vg_ptr; + } + vg_ptr->lv_subdir_pde = + create_proc_entry(LVM_LV_SUBDIR, S_IFDIR, + vg_ptr->vg_dir_pde); + vg_ptr->pv_subdir_pde = + create_proc_entry(LVM_PV_SUBDIR, S_IFDIR, + vg_ptr->vg_dir_pde); + } + + if ( vg_ptr->pv_subdir_pde != NULL) { + for ( l = 0; l < vg_ptr->lv_max; l++) { + if ( ( lv_ptr = vg_ptr->lv[l]) == NULL) continue; + lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr); + } + for ( p = 0; p < vg_ptr->pv_max; p++) { + if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) continue; + lvm_do_create_proc_entry_of_pv ( vg_ptr, pv_ptr); + } + } +} + +/* + * remove a /proc entry for a volume group + */ +void lvm_do_remove_proc_entry_of_vg ( vg_t *vg_ptr) { + int l, p; + lv_t *lv_ptr; + pv_t *pv_ptr; + + for ( l = 0; l < vg_ptr->lv_max; l++) { + if ( ( lv_ptr = vg_ptr->lv[l]) == NULL) continue; + lvm_do_remove_proc_entry_of_lv ( vg_ptr, vg_ptr->lv[l]); + } + for ( p = 0; p < vg_ptr->pv_max; p++) { + if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) continue; + lvm_do_remove_proc_entry_of_pv ( vg_ptr, vg_ptr->pv[p]); + } + if ( vg_ptr->vg_dir_pde != NULL) { + remove_proc_entry(LVM_LV_SUBDIR, vg_ptr->vg_dir_pde); + remove_proc_entry(LVM_PV_SUBDIR, vg_ptr->vg_dir_pde); + remove_proc_entry("group", vg_ptr->vg_dir_pde); + remove_proc_entry(vg_ptr->vg_name, lvm_proc_vg_subdir); + } +} +#endif + + /* * support function initialize gendisk variables */ @@ -2516,8 +3158,9 @@ lvm_blocksizes[i] = BLOCK_SIZE; } - blksize_size[MAJOR_NR] = lvm_blocksizes; blk_size[MAJOR_NR] = lvm_size; + blksize_size[MAJOR_NR] = lvm_blocksizes; + hardsect_size[MAJOR_NR] = lvm_blocksizes; return; } /* lvm_gen_init() */ @@ -2533,17 +3176,8 @@ * * Is this the real thing? * - * No, it's bollocks. md.c tries to do a bit different thing that might - * _somewhat_ work eons ago. Neither does any good these days. mount() couldn't - * care less for icache (it cares only for ->s_root->d_count and if we want - * loopback mounts even that will stop). BTW, with the form used here mount() - * would have to scan the _whole_ icache to detect the attempt - how on the - * Earth could it guess the i_ino of your dummy inode? Official line on the - * exclusion between mount()/swapon()/open()/etc. is Just Don't Do It(tm). - * If you can convince Linus that it's worth changing - fine, then you'll need - * to do blkdev_get()/blkdev_put(). Until then... */ -struct inode *lvm_get_inode(kdev_t dev) +struct inode *lvm_get_inode(int dev) { struct inode *inode_this = NULL; diff -urN 2.4.0-test13-pre3/include/linux/lvm.h 2.4.0-test13-pre3-lvm/include/linux/lvm.h --- 2.4.0-test13-pre3/include/linux/lvm.h Sat Dec 16 15:26:18 2000 +++ 2.4.0-test13-pre3-lvm/include/linux/lvm.h Mon Dec 18 19:40:48 2000 @@ -1,12 +1,14 @@ /* + * include/linux/lvm.h * kernel/lvm.h + * tools/lib/lvm.h * - * Copyright (C) 1997 - 2000 Heinz Mauelshagen, Germany + * Copyright (C) 1997 - 2000 Heinz Mauelshagen, Sistina Software * * February-November 1997 * May-July 1998 * January-March,July,September,October,Dezember 1999 - * January 2000 + * January,February,July,November 2000 * * lvm is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -49,6 +51,12 @@ * 08/12/1999 - changed LVM_LV_SIZE_MAX macro to reflect current 1TB limit * 01/01/2000 - extended lv_v2 core structure by wait_queue member * 12/02/2000 - integrated Andrea Arcagnelli's snapshot work + * 18/02/2000 - seperated user and kernel space parts by + * #ifdef them with __KERNEL__ + * 08/03/2000 - implemented cluster/shared bits for vg_access + * 26/06/2000 - implemented snapshot persistency and resizing support + * 02/11/2000 - added hash table size member to lv structure + * 12/11/2000 - removed unneeded timestamp definitions * */ @@ -56,7 +64,10 @@ #ifndef _LVM_H_INCLUDE #define _LVM_H_INCLUDE -#define _LVM_H_VERSION "LVM 0.8final (15/2/2000)" +#define _LVM_KERNEL_H_VERSION "LVM 0.9 (13/11/2000)" + +#include +#include /* * preprocessor definitions @@ -64,8 +75,9 @@ /* if you like emergency reset code in the driver */ #define LVM_TOTAL_RESET +#ifdef __KERNEL__ #define LVM_GET_INODE -#undef LVM_HD_NAME +#undef LVM_HD_NAME /* display nice names in /proc/partitions */ /* lots of debugging output (see driver source) #define DEBUG_LVM_GET_INFO @@ -79,37 +91,50 @@ #define DEBUG_LVM_BLK_OPEN #define DEBUG_KFREE */ - -#include +#endif /* #ifdef __KERNEL__ */ #ifndef __KERNEL__ -#define ____NOT_KERNEL____ #define __KERNEL__ -#endif #include -#ifdef ____NOT_KERNEL____ -#undef ____NOT_KERNEL____ +#include #undef __KERNEL__ -#endif +#else +#include +#include +#endif /* #ifndef __KERNEL__ */ +#include #include -#if LINUX_VERSION_CODE >= KERNEL_VERSION ( 2, 3 ,0) +#ifdef __KERNEL__ #include -#else -#include -#endif - #include +#endif /* #ifdef __KERNEL__ */ + #include #if !defined ( LVM_BLK_MAJOR) || !defined ( LVM_CHAR_MAJOR) #error Bad include/linux/major.h - LVM MAJOR undefined #endif +#ifdef BLOCK_SIZE +#undef BLOCK_SIZE +#endif + +#ifdef CONFIG_ARCH_S390 +#define BLOCK_SIZE 4096 +#else +#define BLOCK_SIZE 1024 +#endif + +#ifndef SECTOR_SIZE +#define SECTOR_SIZE 512 +#endif #define LVM_STRUCT_VERSION 1 /* structure version */ +#define LVM_DIR_PREFIX "/dev/" + #ifndef min #define min(a,b) (((a)<(b))?(a):(b)) #endif @@ -119,26 +144,33 @@ /* set the default structure version */ #if ( LVM_STRUCT_VERSION == 1) -#define pv_t pv_v1_t -#define lv_t lv_v2_t -#define vg_t vg_v1_t -#define pv_disk_t pv_disk_v1_t -#define lv_disk_t lv_disk_v1_t -#define vg_disk_t vg_disk_v1_t -#define lv_exception_t lv_v2_exception_t +#define pv_t pv_v2_t +#define lv_t lv_v4_t +#define vg_t vg_v3_t +#define pv_disk_t pv_disk_v2_t +#define lv_disk_t lv_disk_v3_t +#define vg_disk_t vg_disk_v2_t +#define lv_block_exception_t lv_block_exception_v1_t +#define lv_COW_table_disk_t lv_COW_table_disk_v1_t #endif + /* - * i/o protocoll version + * i/o protocol version * * defined here for the driver and defined seperate in the - * user land LVM parts + * user land tools/lib/liblvm.h * */ -#define LVM_DRIVER_IOP_VERSION 6 +#define LVM_DRIVER_IOP_VERSION 10 #define LVM_NAME "lvm" +#define LVM_GLOBAL "global" +#define LVM_DIR "lvm" +#define LVM_VG_SUBDIR "VGs" +#define LVM_LV_SUBDIR "LVs" +#define LVM_PV_SUBDIR "PVs" /* * VG/LV indexing macros @@ -216,11 +248,12 @@ #define LVM_TIMESTAMP_DISK_SIZE 512L /* reserved for timekeeping */ /* name list of physical volumes on disk */ -#define LVM_PV_NAMELIST_DISK_BASE ( LVM_TIMESTAMP_DISK_BASE + \ +#define LVM_PV_UUIDLIST_DISK_BASE ( LVM_TIMESTAMP_DISK_BASE + \ LVM_TIMESTAMP_DISK_SIZE) /* now for the dynamically calculated parts of the VGDA */ -#define LVM_LV_DISK_OFFSET(a, b) ( (a)->lv_on_disk.base + sizeof ( lv_t) * b) +#define LVM_LV_DISK_OFFSET(a, b) ( (a)->lv_on_disk.base + \ + sizeof ( lv_disk_t) * b) #define LVM_DISK_SIZE(pv) ( (pv)->pe_on_disk.base + \ (pv)->pe_on_disk.size) #define LVM_PE_DISK_OFFSET(pe, pv) ( pe * pv->pe_size + \ @@ -250,21 +283,21 @@ * Should be a sufficient spectrum ;*) */ -/* This is the usable size of disk_pe_t.le_num !!! v v */ +/* This is the usable size of pe_disk_t.le_num !!! v v */ #define LVM_PE_T_MAX ( ( 1 << ( sizeof ( uint16_t) * 8)) - 2) -#define LVM_LV_SIZE_MAX(a) ( ( long long) LVM_PE_T_MAX * (a)->pe_size > ( long long) 2*1024*1024*1024 ? ( long long) 2*1024*1024*1024 : ( long long) LVM_PE_T_MAX * (a)->pe_size) -#define LVM_MIN_PE_SIZE ( 8L * 2) /* 8 KB in sectors */ -#define LVM_MAX_PE_SIZE ( 16L * 1024L * 1024L * 2) /* 16GB in sectors */ -#define LVM_DEFAULT_PE_SIZE ( 4096L * 2) /* 4 MB in sectors */ +#define LVM_LV_SIZE_MAX(a) ( ( long long) LVM_PE_T_MAX * (a)->pe_size > ( long long) 1024*1024/SECTOR_SIZE*1024*1024 ? ( long long) 1024*1024/SECTOR_SIZE*1024*1024 : ( long long) LVM_PE_T_MAX * (a)->pe_size) +#define LVM_MIN_PE_SIZE ( 8192L / SECTOR_SIZE) /* 8 KB in sectors */ +#define LVM_MAX_PE_SIZE ( 16L * 1024L * 1024L / SECTOR_SIZE * 1024) /* 16GB in sectors */ +#define LVM_DEFAULT_PE_SIZE ( 4096L * 1024 / SECTOR_SIZE) /* 4 MB in sectors */ #define LVM_DEFAULT_STRIPE_SIZE 16L /* 16 KB */ -#define LVM_MIN_STRIPE_SIZE ( PAGE_SIZE>>9) /* PAGESIZE in sectors */ -#define LVM_MAX_STRIPE_SIZE ( 512L * 2) /* 512 KB in sectors */ +#define LVM_MIN_STRIPE_SIZE ( PAGE_SIZE>>9) /* PAGESIZE in sectors */ +#define LVM_MAX_STRIPE_SIZE ( 512L * 1024 / SECTOR_SIZE) /* 512 KB in sectors */ #define LVM_MAX_STRIPES 128 /* max # of stripes */ -#define LVM_MAX_SIZE ( 1024LU * 1024 * 1024 * 2) /* 1TB[sectors] */ +#define LVM_MAX_SIZE ( 1024LU * 1024 / SECTOR_SIZE * 1024 * 1024) /* 1TB[sectors] */ #define LVM_MAX_MIRRORS 2 /* future use */ -#define LVM_MIN_READ_AHEAD 0 /* minimum read ahead sectors */ -#define LVM_MAX_READ_AHEAD 256 /* maximum read ahead sectors */ +#define LVM_MIN_READ_AHEAD 2 /* minimum read ahead sectors */ +#define LVM_MAX_READ_AHEAD 120 /* maximum read ahead sectors */ #define LVM_MAX_LV_IO_TIMEOUT 60 /* seconds I/O timeout (future use) */ #define LVM_PARTITION 0xfe /* LVM partition id */ #define LVM_NEW_PARTITION 0x8e /* new LVM partition id (10/09/1999) */ @@ -279,6 +312,64 @@ #define TRUE 1 +#define LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv) ( \ + vg->pe_size / lv->lv_chunk_size) + +#define LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv) ( \ +{ \ + int COW_table_entries_per_PE; \ + int COW_table_chunks_per_PE; \ +\ + COW_table_entries_per_PE = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv); \ + COW_table_chunks_per_PE = ( COW_table_entries_per_PE * sizeof(lv_COW_table_disk_t) / SECTOR_SIZE + lv->lv_chunk_size - 1) / lv->lv_chunk_size; \ + COW_table_entries_per_PE - COW_table_chunks_per_PE;}) + + +/* to disk and to core data conversion macros */ +#if __BYTE_ORDER == __BIG_ENDIAN + +#define LVM_TO_CORE16(x) ( \ + ((uint16_t)((((uint16_t)(x) & 0x00FFU) << 8) | \ + (((uint16_t)(x) & 0xFF00U) >> 8)))) + +#define LVM_TO_DISK16(x) LVM_TO_CORE16(x) + +#define LVM_TO_CORE32(x) ( \ + ((uint32_t)((((uint32_t)(x) & 0x000000FFU) << 24) | \ + (((uint32_t)(x) & 0x0000FF00U) << 8))) \ + (((uint32_t)(x) & 0x00FF0000U) >> 8))) \ + (((uint32_t)(x) & 0xFF000000U) >> 24)))) + +#define LVM_TO_DISK32(x) LVM_TO_CORE32(x) + +#define LVM_TO_CORE64(x) \ + ((uint64_t)((((uint64_t)(x) & 0x00000000000000FFULL) << 56) | \ + (((uint64_t)(x) & 0x000000000000FF00ULL) << 40) | \ + (((uint64_t)(x) & 0x0000000000FF0000ULL) << 24) | \ + (((uint64_t)(x) & 0x00000000FF000000ULL) << 8) | \ + (((uint64_t)(x) & 0x000000FF00000000ULL) >> 8) | \ + (((uint64_t)(x) & 0x0000FF0000000000ULL) >> 24) | \ + (((uint64_t)(x) & 0x00FF000000000000ULL) >> 40) | \ + (((uint64_t)(x) & 0xFF00000000000000ULL) >> 56))) + +#define LVM_TO_DISK64(x) LVM_TO_CORE64(x) + +#elif __BYTE_ORDER == __LITTLE_ENDIAN + +#define LVM_TO_CORE16(x) x +#define LVM_TO_DISK16(x) x +#define LVM_TO_CORE32(x) x +#define LVM_TO_DISK32(x) x +#define LVM_TO_CORE64(x) x +#define LVM_TO_DISK64(x) x + +#else + +#error "__BYTE_ORDER must be defined as __LITTLE_ENDIAN or __BIG_ENDIAN" + +#endif /* #if __BYTE_ORDER == __BIG_ENDIAN */ + + /* * ioctls */ @@ -294,6 +385,7 @@ #define VG_STATUS_GET_NAMELIST _IOWR ( 0xfe, 0x07, 1) #define VG_SET_EXTENDABLE _IOW ( 0xfe, 0x08, 1) +#define VG_RENAME _IOW ( 0xfe, 0x09, 1) /* logical volume */ @@ -315,6 +407,14 @@ #define LE_REMAP _IOW ( 0xfe, 0x2b, 1) +#define LV_SNAPSHOT_USE_RATE _IOWR ( 0xfe, 0x2c, 1) + +#define LV_STATUS_BYDEV _IOWR ( 0xfe, 0x2e, 1) + +#define LV_RENAME _IOW ( 0xfe, 0x2f, 1) + +#define LV_BMAP _IOWR ( 0xfe, 0x30, 1) + /* physical volume */ #define PV_STATUS _IOWR ( 0xfe, 0x40, 1) @@ -347,6 +447,8 @@ #define VG_READ 0x01 /* vg_access */ #define VG_WRITE 0x02 /* " */ +#define VG_CLUSTERED 0x04 /* " */ +#define VG_SHARED 0x08 /* " */ /* logical volume */ #define LV_ACTIVE 0x01 /* lv_status */ @@ -376,34 +478,36 @@ */ #define NAME_LEN 128 /* don't change!!! */ -#define UUID_LEN 16 /* don't change!!! */ - -/* remap physical sector/rdev pairs */ -typedef struct -{ - struct list_head hash; - ulong rsector_org; - kdev_t rdev_org; - ulong rsector_new; - kdev_t rdev_new; -} lv_block_exception_t; +#define UUID_LEN 32 /* don't change!!! */ +/* copy on write tables in disk format */ +typedef struct { + uint64_t pv_org_number; + uint64_t pv_org_rsector; + uint64_t pv_snap_number; + uint64_t pv_snap_rsector; +} lv_COW_table_disk_v1_t; + +/* remap physical sector/rdev pairs including hash */ +typedef struct { + struct list_head hash; + ulong rsector_org; + kdev_t rdev_org; + ulong rsector_new; + kdev_t rdev_new; +} lv_block_exception_v1_t; /* disk stored pe information */ -typedef struct - { - uint16_t lv_num; - uint16_t le_num; - } -disk_pe_t; +typedef struct { + uint16_t lv_num; + uint16_t le_num; +} pe_disk_t; /* disk stored PV, VG, LV and PE size and offset information */ -typedef struct - { - uint32_t base; - uint32_t size; - } -lvm_disk_data_t; +typedef struct { + uint32_t base; + uint32_t size; +} lvm_disk_data_t; /* @@ -411,95 +515,104 @@ */ /* core */ -typedef struct - { - uint8_t id[2]; /* Identifier */ - uint16_t version; /* HM lvm version */ - lvm_disk_data_t pv_on_disk; - lvm_disk_data_t vg_on_disk; - lvm_disk_data_t pv_namelist_on_disk; - lvm_disk_data_t lv_on_disk; - lvm_disk_data_t pe_on_disk; - uint8_t pv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */ - kdev_t pv_dev; - uint32_t pv_number; - uint32_t pv_status; - uint32_t pv_allocatable; - uint32_t pv_size; /* HM */ - uint32_t lv_cur; - uint32_t pe_size; - uint32_t pe_total; - uint32_t pe_allocated; - uint32_t pe_stale; /* for future use */ - - disk_pe_t *pe; /* HM */ - struct inode *inode; /* HM */ - } -pv_v1_t; +typedef struct { + char id[2]; /* Identifier */ + unsigned short version; /* HM lvm version */ + lvm_disk_data_t pv_on_disk; + lvm_disk_data_t vg_on_disk; + lvm_disk_data_t pv_namelist_on_disk; + lvm_disk_data_t lv_on_disk; + lvm_disk_data_t pe_on_disk; + char pv_name[NAME_LEN]; + char vg_name[NAME_LEN]; + char system_id[NAME_LEN]; /* for vgexport/vgimport */ + kdev_t pv_dev; + uint pv_number; + uint pv_status; + uint pv_allocatable; + uint pv_size; /* HM */ + uint lv_cur; + uint pe_size; + uint pe_total; + uint pe_allocated; + uint pe_stale; /* for future use */ + pe_disk_t *pe; /* HM */ + struct inode *inode; /* HM */ +} pv_v1_t; -/* disk */ -typedef struct - { - uint8_t id[2]; /* Identifier */ - uint16_t version; /* HM lvm version */ - lvm_disk_data_t pv_on_disk; - lvm_disk_data_t vg_on_disk; - lvm_disk_data_t pv_namelist_on_disk; - lvm_disk_data_t lv_on_disk; - lvm_disk_data_t pe_on_disk; - uint8_t pv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */ - uint32_t pv_major; - uint32_t pv_number; - uint32_t pv_status; - uint32_t pv_allocatable; - uint32_t pv_size; /* HM */ - uint32_t lv_cur; - uint32_t pe_size; - uint32_t pe_total; - uint32_t pe_allocated; - } -pv_disk_v1_t; +/* core */ +typedef struct { + char id[2]; /* Identifier */ + unsigned short version; /* HM lvm version */ + lvm_disk_data_t pv_on_disk; + lvm_disk_data_t vg_on_disk; + lvm_disk_data_t pv_uuidlist_on_disk; + lvm_disk_data_t lv_on_disk; + lvm_disk_data_t pe_on_disk; + char pv_name[NAME_LEN]; + char vg_name[NAME_LEN]; + char system_id[NAME_LEN]; /* for vgexport/vgimport */ + kdev_t pv_dev; + uint pv_number; + uint pv_status; + uint pv_allocatable; + uint pv_size; /* HM */ + uint lv_cur; + uint pe_size; + uint pe_total; + uint pe_allocated; + uint pe_stale; /* for future use */ + pe_disk_t *pe; /* HM */ + struct inode *inode; /* HM */ + char pv_uuid[UUID_LEN+1]; +} pv_v2_t; -/* - * Structure Physical Volume (PV) Version 2 (future!) - */ - -typedef struct - { - uint8_t id[2]; /* Identifier */ - uint16_t version; /* HM lvm version */ - lvm_disk_data_t pv_on_disk; - lvm_disk_data_t vg_on_disk; - lvm_disk_data_t pv_uuid_on_disk; - lvm_disk_data_t lv_on_disk; - lvm_disk_data_t pe_on_disk; - uint8_t pv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */ - kdev_t pv_dev; - uint32_t pv_number; - uint32_t pv_status; - uint32_t pv_allocatable; - uint32_t pv_size; /* HM */ - uint32_t lv_cur; - uint32_t pe_size; - uint32_t pe_total; - uint32_t pe_allocated; - uint32_t pe_stale; /* for future use */ - disk_pe_t *pe; /* HM */ - struct inode *inode; /* HM */ - /* delta to version 1 starts here */ - uint8_t pv_uuid[UUID_LEN]; - uint32_t pv_atime; /* PV access time */ - uint32_t pv_ctime; /* PV creation time */ - uint32_t pv_mtime; /* PV modification time */ - } -pv_v2_t; +/* disk */ +typedef struct { + uint8_t id[2]; /* Identifier */ + uint16_t version; /* HM lvm version */ + lvm_disk_data_t pv_on_disk; + lvm_disk_data_t vg_on_disk; + lvm_disk_data_t pv_namelist_on_disk; + lvm_disk_data_t lv_on_disk; + lvm_disk_data_t pe_on_disk; + uint8_t pv_name[NAME_LEN]; + uint8_t vg_name[NAME_LEN]; + uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */ + uint32_t pv_major; + uint32_t pv_number; + uint32_t pv_status; + uint32_t pv_allocatable; + uint32_t pv_size; /* HM */ + uint32_t lv_cur; + uint32_t pe_size; + uint32_t pe_total; + uint32_t pe_allocated; +} pv_disk_v1_t; + +/* disk */ +typedef struct { + uint8_t id[2]; /* Identifier */ + uint16_t version; /* HM lvm version */ + lvm_disk_data_t pv_on_disk; + lvm_disk_data_t vg_on_disk; + lvm_disk_data_t pv_uuidlist_on_disk; + lvm_disk_data_t lv_on_disk; + lvm_disk_data_t pe_on_disk; + uint8_t pv_uuid[NAME_LEN]; + uint8_t vg_name[NAME_LEN]; + uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */ + uint32_t pv_major; + uint32_t pv_number; + uint32_t pv_status; + uint32_t pv_allocatable; + uint32_t pv_size; /* HM */ + uint32_t lv_cur; + uint32_t pe_size; + uint32_t pe_total; + uint32_t pe_allocated; +} pv_disk_v2_t; /* @@ -507,325 +620,256 @@ */ /* core PE information */ -typedef struct - { - kdev_t dev; - uint32_t pe; /* to be changed if > 2TB */ - uint32_t reads; - uint32_t writes; - } -pe_t; - -typedef struct - { - uint8_t lv_name[NAME_LEN]; - kdev_t old_dev; - kdev_t new_dev; - ulong old_pe; - ulong new_pe; - } -le_remap_req_t; - - +typedef struct { + kdev_t dev; + ulong pe; /* to be changed if > 2TB */ + ulong reads; + ulong writes; +} pe_t; + +typedef struct { + char lv_name[NAME_LEN]; + kdev_t old_dev; + kdev_t new_dev; + ulong old_pe; + ulong new_pe; +} le_remap_req_t; + +typedef struct lv_bmap { + ulong lv_block; + dev_t lv_dev; +} lv_bmap_t; /* - * Structure Logical Volume (LV) Version 1 - */ - -/* disk */ -typedef struct - { - uint8_t lv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint32_t lv_access; - uint32_t lv_status; - uint32_t lv_open; /* HM */ - uint32_t lv_dev; /* HM */ - uint32_t lv_number; /* HM */ - uint32_t lv_mirror_copies; /* for future use */ - uint32_t lv_recovery; /* " */ - uint32_t lv_schedule; /* " */ - uint32_t lv_size; - uint32_t dummy; - uint32_t lv_current_le; /* for future use */ - uint32_t lv_allocated_le; - uint32_t lv_stripes; - uint32_t lv_stripesize; - uint32_t lv_badblock; /* for future use */ - uint32_t lv_allocation; - uint32_t lv_io_timeout; /* for future use */ - uint32_t lv_read_ahead; /* HM, for future use */ - } -lv_disk_v1_t; - - -/* - * Structure Logical Volume (LV) Version 2 + * Structure Logical Volume (LV) Version 3 */ /* core */ -typedef struct lv_v2 - { - uint8_t lv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint32_t lv_access; - uint32_t lv_status; - uint32_t lv_open; /* HM */ - kdev_t lv_dev; /* HM */ - uint32_t lv_number; /* HM */ - uint32_t lv_mirror_copies; /* for future use */ - uint32_t lv_recovery; /* " */ - uint32_t lv_schedule; /* " */ - uint32_t lv_size; - pe_t *lv_current_pe; /* HM */ - uint32_t lv_current_le; /* for future use */ - uint32_t lv_allocated_le; - uint32_t lv_stripes; - uint32_t lv_stripesize; - uint32_t lv_badblock; /* for future use */ - uint32_t lv_allocation; - uint32_t lv_io_timeout; /* for future use */ - uint32_t lv_read_ahead; - - /* delta to version 1 starts here */ - struct lv_v2 *lv_snapshot_org; - struct lv_v2 *lv_snapshot_prev; - struct lv_v2 *lv_snapshot_next; - lv_block_exception_t *lv_block_exception; - uint8_t __unused; - uint32_t lv_remap_ptr; - uint32_t lv_remap_end; - uint32_t lv_chunk_size; - uint32_t lv_snapshot_minor; - struct kiobuf * lv_iobuf; - struct semaphore lv_snapshot_sem; - struct list_head * lv_snapshot_hash_table; - unsigned long lv_snapshot_hash_mask; -} lv_v2_t; +typedef struct lv_v4 { + char lv_name[NAME_LEN]; + char vg_name[NAME_LEN]; + uint lv_access; + uint lv_status; + uint lv_open; /* HM */ + kdev_t lv_dev; /* HM */ + uint lv_number; /* HM */ + uint lv_mirror_copies; /* for future use */ + uint lv_recovery; /* " */ + uint lv_schedule; /* " */ + uint lv_size; + pe_t *lv_current_pe; /* HM */ + uint lv_current_le; /* for future use */ + uint lv_allocated_le; + uint lv_stripes; + uint lv_stripesize; + uint lv_badblock; /* for future use */ + uint lv_allocation; + uint lv_io_timeout; /* for future use */ + uint lv_read_ahead; + + /* delta to version 1 starts here */ + struct lv_v4 *lv_snapshot_org; + struct lv_v4 *lv_snapshot_prev; + struct lv_v4 *lv_snapshot_next; + lv_block_exception_t *lv_block_exception; + uint lv_remap_ptr; + uint lv_remap_end; + uint lv_chunk_size; + uint lv_snapshot_minor; +#ifdef __KERNEL__ + struct kiobuf *lv_iobuf; + struct semaphore lv_snapshot_sem; + struct list_head *lv_snapshot_hash_table; + ulong lv_snapshot_hash_table_size; + ulong lv_snapshot_hash_mask; + struct page *lv_COW_table_page; + wait_queue_head_t lv_snapshot_wait; + int lv_snapshot_use_rate; + void *vg; +#else + char dummy[200]; +#endif +} lv_v4_t; /* disk */ -typedef struct - { - uint8_t lv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint32_t lv_access; - uint32_t lv_status; - uint32_t lv_open; /* HM */ - uint32_t lv_dev; /* HM */ - uint32_t lv_number; /* HM */ - uint32_t lv_mirror_copies; /* for future use */ - uint32_t lv_recovery; /* " */ - uint32_t lv_schedule; /* " */ - uint32_t lv_size; - uint32_t dummy; - uint32_t lv_current_le; /* for future use */ - uint32_t lv_allocated_le; - uint32_t lv_stripes; - uint32_t lv_stripesize; - uint32_t lv_badblock; /* for future use */ - uint32_t lv_allocation; - uint32_t lv_io_timeout; /* for future use */ - uint32_t lv_read_ahead; /* HM, for future use */ - } -lv_disk_v2_t; - +typedef struct { + uint8_t lv_name[NAME_LEN]; + uint8_t vg_name[NAME_LEN]; + uint32_t lv_access; + uint32_t lv_status; + uint32_t lv_open; /* HM */ + uint32_t lv_dev; /* HM */ + uint32_t lv_number; /* HM */ + uint32_t lv_mirror_copies; /* for future use */ + uint32_t lv_recovery; /* " */ + uint32_t lv_schedule; /* " */ + uint32_t lv_size; + uint32_t lv_snapshot_minor;/* minor number of original */ + uint16_t lv_chunk_size; /* chunk size of snapshot */ + uint16_t dummy; + uint32_t lv_allocated_le; + uint32_t lv_stripes; + uint32_t lv_stripesize; + uint32_t lv_badblock; /* for future use */ + uint32_t lv_allocation; + uint32_t lv_io_timeout; /* for future use */ + uint32_t lv_read_ahead; /* HM */ +} lv_disk_v3_t; /* * Structure Volume Group (VG) Version 1 */ -typedef struct - { - uint8_t vg_name[NAME_LEN]; /* volume group name */ - uint32_t vg_number; /* volume group number */ - uint32_t vg_access; /* read/write */ - uint32_t vg_status; /* active or not */ - uint32_t lv_max; /* maximum logical volumes */ - uint32_t lv_cur; /* current logical volumes */ - uint32_t lv_open; /* open logical volumes */ - uint32_t pv_max; /* maximum physical volumes */ - uint32_t pv_cur; /* current physical volumes FU */ - uint32_t pv_act; /* active physical volumes */ - uint32_t dummy; /* was obsolete max_pe_per_pv */ - uint32_t vgda; /* volume group descriptor arrays FU */ - uint32_t pe_size; /* physical extent size in sectors */ - uint32_t pe_total; /* total of physical extents */ - uint32_t pe_allocated; /* allocated physical extents */ - uint32_t pvg_total; /* physical volume groups FU */ - struct proc_dir_entry *proc; - pv_t *pv[ABS_MAX_PV + 1]; /* physical volume struct pointers */ - lv_t *lv[ABS_MAX_LV + 1]; /* logical volume struct pointers */ - } -vg_v1_t; - -typedef struct - { - uint8_t vg_name[NAME_LEN]; /* volume group name */ - uint32_t vg_number; /* volume group number */ - uint32_t vg_access; /* read/write */ - uint32_t vg_status; /* active or not */ - uint32_t lv_max; /* maximum logical volumes */ - uint32_t lv_cur; /* current logical volumes */ - uint32_t lv_open; /* open logical volumes */ - uint32_t pv_max; /* maximum physical volumes */ - uint32_t pv_cur; /* current physical volumes FU */ - uint32_t pv_act; /* active physical volumes */ - uint32_t dummy; - uint32_t vgda; /* volume group descriptor arrays FU */ - uint32_t pe_size; /* physical extent size in sectors */ - uint32_t pe_total; /* total of physical extents */ - uint32_t pe_allocated; /* allocated physical extents */ - uint32_t pvg_total; /* physical volume groups FU */ - } -vg_disk_v1_t; - -/* - * Structure Volume Group (VG) Version 2 - */ - -typedef struct - { - uint8_t vg_name[NAME_LEN]; /* volume group name */ - uint32_t vg_number; /* volume group number */ - uint32_t vg_access; /* read/write */ - uint32_t vg_status; /* active or not */ - uint32_t lv_max; /* maximum logical volumes */ - uint32_t lv_cur; /* current logical volumes */ - uint32_t lv_open; /* open logical volumes */ - uint32_t pv_max; /* maximum physical volumes */ - uint32_t pv_cur; /* current physical volumes FU */ - uint32_t pv_act; /* future: active physical volumes */ - uint32_t max_pe_per_pv; /* OBSOLETE maximum PE/PV */ - uint32_t vgda; /* volume group descriptor arrays FU */ - uint32_t pe_size; /* physical extent size in sectors */ - uint32_t pe_total; /* total of physical extents */ - uint32_t pe_allocated; /* allocated physical extents */ - uint32_t pvg_total; /* physical volume groups FU */ - struct proc_dir_entry *proc; - pv_t *pv[ABS_MAX_PV + 1]; /* physical volume struct pointers */ - lv_t *lv[ABS_MAX_LV + 1]; /* logical volume struct pointers */ - /* delta to version 1 starts here */ - uint8_t vg_uuid[UUID_LEN]; /* volume group UUID */ - time_t vg_atime; /* VG access time */ - time_t vg_ctime; /* VG creation time */ - time_t vg_mtime; /* VG modification time */ - } -vg_v2_t; - - -/* - * Timekeeping structure on disk (0.7 feature) - * - * Holds several timestamps for start/stop time of non - * atomic VGDA disk i/o operations - * - */ - -typedef struct - { - uint32_t seconds; /* seconds since the epoch */ - uint32_t jiffies; /* micro timer */ - } -lvm_time_t; - -#define TIMESTAMP_ID_SIZE 2 -typedef struct - { - uint8_t id[TIMESTAMP_ID_SIZE]; /* Identifier */ - lvm_time_t pv_vg_lv_pe_io_begin; - lvm_time_t pv_vg_lv_pe_io_end; - lvm_time_t pv_io_begin; - lvm_time_t pv_io_end; - lvm_time_t vg_io_begin; - lvm_time_t vg_io_end; - lvm_time_t lv_io_begin; - lvm_time_t lv_io_end; - lvm_time_t pe_io_begin; - lvm_time_t pe_io_end; - lvm_time_t pe_move_io_begin; - lvm_time_t pe_move_io_end; - uint8_t dummy[LVM_TIMESTAMP_DISK_SIZE - - TIMESTAMP_ID_SIZE - - 12 * sizeof (lvm_time_t)]; - /* ATTENTION ^^ */ - } -timestamp_disk_t; - -/* same on disk and in core so far */ -typedef timestamp_disk_t timestamp_t; - -/* function identifiers for timestamp actions */ -typedef enum - { - PV_VG_LV_PE_IO_BEGIN, - PV_VG_LV_PE_IO_END, - PV_IO_BEGIN, - PV_IO_END, - VG_IO_BEGIN, - VG_IO_END, - LV_IO_BEGIN, - LV_IO_END, - PE_IO_BEGIN, - PE_IO_END, - PE_MOVE_IO_BEGIN, - PE_MOVE_IO_END - } -ts_fct_id_t; +/* core */ +typedef struct { + char vg_name[NAME_LEN]; /* volume group name */ + uint vg_number; /* volume group number */ + uint vg_access; /* read/write */ + uint vg_status; /* active or not */ + uint lv_max; /* maximum logical volumes */ + uint lv_cur; /* current logical volumes */ + uint lv_open; /* open logical volumes */ + uint pv_max; /* maximum physical volumes */ + uint pv_cur; /* current physical volumes FU */ + uint pv_act; /* active physical volumes */ + uint dummy; /* was obsolete max_pe_per_pv */ + uint vgda; /* volume group descriptor arrays FU */ + uint pe_size; /* physical extent size in sectors */ + uint pe_total; /* total of physical extents */ + uint pe_allocated; /* allocated physical extents */ + uint pvg_total; /* physical volume groups FU */ + struct proc_dir_entry *proc; + pv_t *pv[ABS_MAX_PV + 1]; /* physical volume struct pointers */ + lv_t *lv[ABS_MAX_LV + 1]; /* logical volume struct pointers */ +} vg_v1_t; + +typedef struct { + char vg_name[NAME_LEN]; /* volume group name */ + uint vg_number; /* volume group number */ + uint vg_access; /* read/write */ + uint vg_status; /* active or not */ + uint lv_max; /* maximum logical volumes */ + uint lv_cur; /* current logical volumes */ + uint lv_open; /* open logical volumes */ + uint pv_max; /* maximum physical volumes */ + uint pv_cur; /* current physical volumes FU */ + uint pv_act; /* active physical volumes */ + uint dummy; /* was obsolete max_pe_per_pv */ + uint vgda; /* volume group descriptor arrays FU */ + uint pe_size; /* physical extent size in sectors */ + uint pe_total; /* total of physical extents */ + uint pe_allocated; /* allocated physical extents */ + uint pvg_total; /* physical volume groups FU */ + struct proc_dir_entry *proc; + pv_t *pv[ABS_MAX_PV + 1]; /* physical volume struct pointers */ + lv_t *lv[ABS_MAX_LV + 1]; /* logical volume struct pointers */ + char vg_uuid[UUID_LEN+1]; /* volume group UUID */ +#ifdef __KERNEL__ + struct proc_dir_entry *vg_dir_pde; + struct proc_dir_entry *lv_subdir_pde; + struct proc_dir_entry *pv_subdir_pde; +#else + char dummy1[200]; +#endif +} vg_v3_t; + + +/* disk */ +typedef struct { + uint8_t vg_name[NAME_LEN]; /* volume group name */ + uint32_t vg_number; /* volume group number */ + uint32_t vg_access; /* read/write */ + uint32_t vg_status; /* active or not */ + uint32_t lv_max; /* maximum logical volumes */ + uint32_t lv_cur; /* current logical volumes */ + uint32_t lv_open; /* open logical volumes */ + uint32_t pv_max; /* maximum physical volumes */ + uint32_t pv_cur; /* current physical volumes FU */ + uint32_t pv_act; /* active physical volumes */ + uint32_t dummy; + uint32_t vgda; /* volume group descriptor arrays FU */ + uint32_t pe_size; /* physical extent size in sectors */ + uint32_t pe_total; /* total of physical extents */ + uint32_t pe_allocated; /* allocated physical extents */ + uint32_t pvg_total; /* physical volume groups FU */ +} vg_disk_v1_t; + +typedef struct { + uint8_t vg_uuid[UUID_LEN]; /* volume group UUID */ + uint8_t vg_name_dummy[NAME_LEN-UUID_LEN]; /* rest of v1 VG name */ + uint32_t vg_number; /* volume group number */ + uint32_t vg_access; /* read/write */ + uint32_t vg_status; /* active or not */ + uint32_t lv_max; /* maximum logical volumes */ + uint32_t lv_cur; /* current logical volumes */ + uint32_t lv_open; /* open logical volumes */ + uint32_t pv_max; /* maximum physical volumes */ + uint32_t pv_cur; /* current physical volumes FU */ + uint32_t pv_act; /* active physical volumes */ + uint32_t dummy; + uint32_t vgda; /* volume group descriptor arrays FU */ + uint32_t pe_size; /* physical extent size in sectors */ + uint32_t pe_total; /* total of physical extents */ + uint32_t pe_allocated; /* allocated physical extents */ + uint32_t pvg_total; /* physical volume groups FU */ +} vg_disk_v2_t; /* * Request structures for ioctls */ -/* Request structure PV_STATUS */ -typedef struct - { - char pv_name[NAME_LEN]; - pv_t *pv; - } -pv_status_req_t, pv_change_req_t; +/* Request structure PV_STATUS_BY_NAME... */ +typedef struct { + char pv_name[NAME_LEN]; + pv_t *pv; +} pv_status_req_t, pv_change_req_t; /* Request structure PV_FLUSH */ -typedef struct - { - char pv_name[NAME_LEN]; - } -pv_flush_req_t; +typedef struct { + char pv_name[NAME_LEN]; + kdev_t pv_dev; +} pv_flush_req_t; /* Request structure PE_MOVE */ -typedef struct - { - enum - { - LOCK_PE, UNLOCK_PE - } - lock; - struct - { - kdev_t lv_dev; - kdev_t pv_dev; - uint32_t pv_offset; - } - data; - } -pe_lock_req_t; +typedef struct { + enum { + LOCK_PE, UNLOCK_PE + } lock; + struct { + kdev_t lv_dev; + kdev_t pv_dev; + ulong pv_offset; + } data; +} pe_lock_req_t; /* Request structure LV_STATUS_BYNAME */ -typedef struct - { - char lv_name[NAME_LEN]; - lv_t *lv; - } -lv_status_byname_req_t, lv_req_t; +typedef struct { + char lv_name[NAME_LEN]; + lv_t *lv; +} lv_status_byname_req_t, lv_req_t; /* Request structure LV_STATUS_BYINDEX */ -typedef struct - { - ulong lv_index; - lv_t *lv; - } -lv_status_byindex_req_t; +typedef struct { + ulong lv_index; + lv_t *lv; + /* Transfer size because user space and kernel space differ */ + ushort size; +} lv_status_byindex_req_t; + +/* Request structure LV_STATUS_BYDEV... */ +typedef struct { + dev_t dev; + pv_t *lv; +} lv_status_bydev_req_t; + + +/* Request structure LV_SNAPSHOT_USE_RATE */ +typedef struct { + int block; + int rate; +} lv_snapshot_use_rate_req_t; -#endif /* #ifndef _LVM_H_INCLUDE */ +#endif /* #ifndef _LVM_H_INCLUDE */