diff -urN lvm-ref/drivers/md/lvm.c lvm/drivers/md/lvm.c --- lvm-ref/drivers/md/lvm.c Wed Jul 18 17:11:13 2001 +++ lvm/drivers/md/lvm.c Wed Jul 18 17:15:00 2001 @@ -1111,11 +1111,33 @@ * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c * (see init_module/lvm_init) */ -static inline void __remap_snapshot(kdev_t rdev, ulong rsector, - ulong pe_start, lv_t *lv, vg_t *vg) { +static void __remap_snapshot(kdev_t rdev, ulong rsector, + ulong pe_start, lv_t *lv, vg_t *vg) { + + /* copy a chunk from the origin to a snapshot device */ + down_write(&lv->lv_lock); + + /* we must redo lvm_snapshot_remap_block in order to avoid a + race condition in the gap where no lock was held */ if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) && !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv)) lvm_write_COW_table_block(vg, lv); + + up_write(&lv->lv_lock); +} + +static inline void _remap_snapshot(kdev_t rdev, ulong rsector, + ulong pe_start, lv_t *lv, vg_t *vg) { + int r; + + /* check to see if this chunk is already in the snapshot */ + down_read(&lv->lv_lock); + r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv); + up_read(&lv->lv_lock); + + if (!r) + /* we haven't yet copied this block to the snapshot */ + __remap_snapshot(rdev, rsector, pe_start, lv, vg); } static int lvm_map(struct buffer_head *bh, int rw) @@ -1131,7 +1153,7 @@ lv_t *lv = vg_this->lv[LV_BLK(minor)]; - down(&lv->lv_snapshot_sem); + down_read(&lv->lv_lock); if (!(lv->lv_status & LV_ACTIVE)) { printk(KERN_ALERT "%s - lvm_map: ll_rw_blk for inactive LV %s\n", @@ -1197,8 +1219,16 @@ rsector_map, stripe_length, stripe_index); } +#if 0 /* + * drop the semaphore to avoid hurting I/O performance, pv_move + * is racy anyways in beta7. You are required to avoid pv_moves + * with writes going on the LV in beta7 anyways. So this doesn't make + * any reliability difference and it fixes the performance + * showstopper during production. + */ /* handle physical extents on the move */ down(&_pe_lock); +#endif if((pe_lock_req.lock == LOCK_PE) && (rdev_map == pe_lock_req.data.pv_dev) && (rsector_map >= pe_lock_req.data.pv_offset) && @@ -1209,17 +1239,23 @@ printk(KERN_ERR "%s -- bh uses low 2 bits of pointer\n", lvm_name); +#if 0 up(&_pe_lock); +#endif goto bad; } bh->b_reqnext = _pe_requests; _pe_requests = (struct buffer_head *) ((int) bh | rw); +#if 0 up(&_pe_lock); - up(&lv->lv_snapshot_sem); +#endif + up_read(&lv->lv_lock); return 0; } +#if 0 up(&_pe_lock); +#endif /* statistic */ if (rw == WRITE || rw == WRITEA) @@ -1252,22 +1288,20 @@ /* Serializes the COW with the accesses to the snapshot device */ - down(&snap->lv_snapshot_sem); - __remap_snapshot(rdev_map, rsector_map, - pe_start, snap, vg_this); - up(&snap->lv_snapshot_sem); + _remap_snapshot(rdev_map, rsector_map, + pe_start, snap, vg_this); } } out: bh->b_rdev = rdev_map; bh->b_rsector = rsector_map; - up(&lv->lv_snapshot_sem); + up_read(&lv->lv_lock); return 1; bad: buffer_IO_error(bh); - up(&lv->lv_snapshot_sem); + up_read(&lv->lv_lock); return -1; } /* lvm_map() */ @@ -1364,9 +1398,8 @@ } if (p == vg_ptr->pv_max) return -ENXIO; - pe_lock_req = new_lock; - down(&_pe_lock); + pe_lock_req = new_lock; pe_lock_req.lock = UNLOCK_PE; up(&_pe_lock); @@ -1865,7 +1898,7 @@ lv_ptr->lv_snapshot_hash_table = NULL; lv_ptr->lv_snapshot_hash_table_size = 0; lv_ptr->lv_snapshot_hash_mask = 0; - init_MUTEX(&lv_ptr->lv_snapshot_sem); + init_rwsem(&lv_ptr->lv_lock); lv_ptr->lv_snapshot_use_rate = 0; vg_ptr->lv[l] = lv_ptr; @@ -2007,7 +2040,7 @@ fsync_dev_lockfs(org->lv_dev); #endif - down(&org->lv_snapshot_sem); + down_write(&org->lv_lock); org->lv_access |= LV_SNAPSHOT_ORG; lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG; /* this can only hide an userspace bug */ @@ -2016,7 +2049,7 @@ for (last = org; last->lv_snapshot_next; last = last->lv_snapshot_next); lv_ptr->lv_snapshot_prev = last; last->lv_snapshot_next = lv_ptr; - up(&org->lv_snapshot_sem); + up_write(&org->lv_lock); } /* activate the logical volume */ @@ -2085,7 +2118,7 @@ * to the original lv before playing with it. */ lv_t * org = lv_ptr->lv_snapshot_org; - down(&org->lv_snapshot_sem); + down_write(&org->lv_lock); /* remove this snapshot logical volume from the chain */ lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next; @@ -2098,7 +2131,7 @@ if (!org->lv_snapshot_next) { org->lv_access &= ~LV_SNAPSHOT_ORG; } - up(&org->lv_snapshot_sem); + up_write(&org->lv_lock); lvm_snapshot_release(lv_ptr); @@ -2295,7 +2328,7 @@ return r; /* copy relevent fields */ - down(&old_lv->lv_snapshot_sem); + down_write(&old_lv->lv_lock); if(new_lv->lv_access & LV_SNAPSHOT) { @@ -2337,7 +2370,7 @@ lv_t *snap; for(snap = old_lv->lv_snapshot_next; snap; snap = snap->lv_snapshot_next) { - down(&snap->lv_snapshot_sem); + down_write(&snap->lv_lock); snap->lv_current_pe = old_lv->lv_current_pe; snap->lv_allocated_le = old_lv->lv_allocated_le; @@ -2349,13 +2382,13 @@ lvm_size[MINOR(snap->lv_dev)] = old_lv->lv_size >> 1; __update_hardblocksize(snap); - up(&snap->lv_snapshot_sem); + up_write(&snap->lv_lock); } } } __update_hardblocksize(old_lv); - up(&old_lv->lv_snapshot_sem); + up_write(&old_lv->lv_lock); return 0; } /* lvm_do_lv_extend_reduce() */ diff -urN lvm-ref/include/linux/lvm.h lvm/include/linux/lvm.h --- lvm-ref/include/linux/lvm.h Wed Jul 18 17:11:13 2001 +++ lvm/include/linux/lvm.h Wed Jul 18 17:11:45 2001 @@ -650,7 +650,7 @@ #ifdef __KERNEL__ struct kiobuf *lv_iobuf; struct kiobuf *lv_COW_table_iobuf; - struct semaphore lv_snapshot_sem; + struct rw_semaphore lv_lock; struct list_head *lv_snapshot_hash_table; uint32_t lv_snapshot_hash_table_size; uint32_t lv_snapshot_hash_mask;