From: NeilBrown When an array is degraded, bit in the intent-bitmap are never cleared. So if a recently failed drive is re-added, we only need to reconstruct the block that are still reflected in the bitmap. This patch adds support for this re-adding. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton --- 25-akpm/drivers/md/md.c | 71 +++++++++++++++++++++++++++++--------- 25-akpm/drivers/md/raid1.c | 7 +++ 25-akpm/include/linux/raid/md_k.h | 4 ++ 3 files changed, 65 insertions(+), 17 deletions(-) diff -puN drivers/md/md.c~md-optimise-reconstruction-when-re-adding-a-recently-failed-drive drivers/md/md.c --- 25/drivers/md/md.c~md-optimise-reconstruction-when-re-adding-a-recently-failed-drive 2005-03-10 19:10:05.000000000 -0800 +++ 25-akpm/drivers/md/md.c 2005-03-10 19:10:05.000000000 -0800 @@ -577,6 +577,8 @@ static int super_90_validate(mddev_t *md mdp_disk_t *desc; mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); + rdev->raid_disk = -1; + rdev->in_sync = 0; if (mddev->raid_disks == 0) { mddev->major_version = 0; mddev->minor_version = sb->minor_version; @@ -607,16 +609,24 @@ static int super_90_validate(mddev_t *md memcpy(mddev->uuid+12,&sb->set_uuid3, 4); mddev->max_disks = MD_SB_DISKS; - } else { - __u64 ev1; - ev1 = md_event(sb); + } else if (mddev->pers == NULL) { + /* Insist on good event counter while assembling */ + __u64 ev1 = md_event(sb); ++ev1; if (ev1 < mddev->events) return -EINVAL; - } + } else if (mddev->bitmap) { + /* if adding to array with a bitmap, then we can accept an + * older device ... but not too old. + */ + __u64 ev1 = md_event(sb); + if (ev1 < mddev->bitmap->events_cleared) + return 0; + } else /* just a hot-add of a new device, leave raid_disk at -1 */ + return 0; + if (mddev->level != LEVEL_MULTIPATH) { - rdev->raid_disk = -1; - rdev->in_sync = rdev->faulty = 0; + rdev->faulty = 0; desc = sb->disks + rdev->desc_nr; if (desc->state & (1<in_sync = 1; rdev->raid_disk = desc->raid_disk; } - } + } else /* MULTIPATH are always insync */ + rdev->in_sync = 1; return 0; } @@ -868,6 +879,8 @@ static int super_1_validate(mddev_t *mdd { struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); + rdev->raid_disk = -1; + rdev->in_sync = 0; if (mddev->raid_disks == 0) { mddev->major_version = 1; mddev->patch_version = 0; @@ -885,13 +898,21 @@ static int super_1_validate(mddev_t *mdd memcpy(mddev->uuid, sb->set_uuid, 16); mddev->max_disks = (4096-256)/2; - } else { - __u64 ev1; - ev1 = le64_to_cpu(sb->events); + } else if (mddev->pers == NULL) { + /* Insist of good event counter while assembling */ + __u64 ev1 = le64_to_cpu(sb->events); ++ev1; if (ev1 < mddev->events) return -EINVAL; - } + } else if (mddev->bitmap) { + /* If adding to array with a bitmap, then we can accept an + * older device, but not too old. + */ + __u64 ev1 = le64_to_cpu(sb->events); + if (ev1 < mddev->bitmap->events_cleared) + return 0; + } else /* just a hot-add of a new device, leave raid_disk at -1 */ + return 0; if (mddev->level != LEVEL_MULTIPATH) { int role; @@ -899,14 +920,10 @@ static int super_1_validate(mddev_t *mdd role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); switch(role) { case 0xffff: /* spare */ - rdev->in_sync = 0; rdev->faulty = 0; - rdev->raid_disk = -1; break; case 0xfffe: /* faulty */ - rdev->in_sync = 0; rdev->faulty = 1; - rdev->raid_disk = -1; break; default: rdev->in_sync = 1; @@ -914,7 +931,9 @@ static int super_1_validate(mddev_t *mdd rdev->raid_disk = role; break; } - } + } else /* MULTIPATH are always insync */ + rdev->in_sync = 1; + return 0; } @@ -2166,6 +2185,18 @@ static int add_new_disk(mddev_t * mddev, PTR_ERR(rdev)); return PTR_ERR(rdev); } + /* set save_raid_disk if appropriate */ + if (!mddev->persistent) { + if (info->state & (1<raid_disk < mddev->raid_disks) + rdev->raid_disk = info->raid_disk; + else + rdev->raid_disk = -1; + } else + super_types[mddev->major_version]. + validate_super(mddev, rdev); + rdev->saved_raid_disk = rdev->raid_disk; + rdev->in_sync = 0; /* just to be sure */ rdev->raid_disk = -1; err = bind_rdev_to_array(rdev, mddev); @@ -3722,6 +3753,14 @@ void md_check_recovery(mddev_t *mddev) mddev->pers->spare_active(mddev); } md_update_sb(mddev); + + /* if array is no-longer degraded, then any saved_raid_disk + * information must be scrapped + */ + if (!mddev->degraded) + ITERATE_RDEV(mddev,rdev,rtmp) + rdev->saved_raid_disk = -1; + mddev->recovery = 0; /* flag recovery needed just to double check */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); diff -puN drivers/md/raid1.c~md-optimise-reconstruction-when-re-adding-a-recently-failed-drive drivers/md/raid1.c --- 25/drivers/md/raid1.c~md-optimise-reconstruction-when-re-adding-a-recently-failed-drive 2005-03-10 19:10:05.000000000 -0800 +++ 25-akpm/drivers/md/raid1.c 2005-03-10 19:10:05.000000000 -0800 @@ -811,9 +811,12 @@ static int raid1_add_disk(mddev_t *mddev { conf_t *conf = mddev->private; int found = 0; - int mirror; + int mirror = 0; mirror_info_t *p; + if (rdev->saved_raid_disk >= 0 && + conf->mirrors[rdev->saved_raid_disk].rdev == NULL) + mirror = rdev->saved_raid_disk; for (mirror=0; mirror < mddev->raid_disks; mirror++) if ( !(p=conf->mirrors+mirror)->rdev) { @@ -830,6 +833,8 @@ static int raid1_add_disk(mddev_t *mddev p->head_position = 0; rdev->raid_disk = mirror; found = 1; + if (rdev->saved_raid_disk != mirror) + conf->fullsync = 1; p->rdev = rdev; break; } diff -puN include/linux/raid/md_k.h~md-optimise-reconstruction-when-re-adding-a-recently-failed-drive include/linux/raid/md_k.h --- 25/include/linux/raid/md_k.h~md-optimise-reconstruction-when-re-adding-a-recently-failed-drive 2005-03-10 19:10:05.000000000 -0800 +++ 25-akpm/include/linux/raid/md_k.h 2005-03-10 19:10:05.000000000 -0800 @@ -183,6 +183,10 @@ struct mdk_rdev_s int desc_nr; /* descriptor index in the superblock */ int raid_disk; /* role of device in array */ + int saved_raid_disk; /* role that device used to have in the + * array and could again if we did a partial + * resync from the bitmap + */ atomic_t nr_pending; /* number of pending requests. * only maintained for arrays that _