xfs_scrub: fix read verify disk error handling strategy

The error handling strategy for media errors is totally bogus. First of all, short reads are entirely unhandled -- when we encounter a short read, we know the disk was able to feed us the beginning of what we asked for, so we need to single-step through the remainder to try to capture the exact error that we hit. Second, an actual IO error causes the entire region to be marked bad even though it could be just a few MB of a multi-gigabyte extent that's bad. Therefore, single-step each block in the IO request until we stop getting IO errors to find out if all the blocks are bad or if it's just that extent. Third, fix the fact that the loop updates its own counter variables with the length fed to read(), which doesn't necessarily have anything to do with the amount of data that the read actually produced. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> [sandeen: change "io_error" to "read_error"] Reviewed-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
author: Darrick J. Wong <darrick.wong@oracle.com> 2019-10-16 22:35:26 -0400
committer: Eric Sandeen <sandeen@sandeen.net> 2019-10-16 22:35:26 -0400
commit: 27464242956facd1cefb68164f546be12c88f6ba (patch)
tree: 317af0cb94bac92c33a30be53acab3749a27bdda /scrub
parent: 323ef14c89b22fdf9265bba4f5f33b7bc891a33e (diff)
download: xfsprogs-dev-27464242956facd1cefb68164f546be12c88f6ba.tar.gz
1 files changed, 74 insertions, 12 deletions
diff --git a/scrub/read_verify.c b/scrub/read_verify.c
index 231df80266..cba1b2d418 100644
--- a/scrub/read_verify.c
+++ b/scrub/read_verify.c
@@ -173,30 +173,92 @@ read_verify(
 	struct read_verify		*rv = arg;
 	struct read_verify_pool		*rvp;
 	unsigned long long		verified = 0;
+	ssize_t				io_max_size;
 	ssize_t				sz;
 	ssize_t				len;
+	int				read_error;
 	int				ret;
 
 	rvp = (struct read_verify_pool *)wq->wq_ctx;
+	if (rvp->runtime_error)
+		return;
+
+	io_max_size = RVP_IO_MAX_SIZE;
+
 	while (rv->io_length > 0) {
-		len = min(rv->io_length, RVP_IO_MAX_SIZE);
+		read_error = 0;
+		len = min(rv->io_length, io_max_size);
 		dbg_printf("diskverify %d %"PRIu64" %zu\n", rvp->disk->d_fd,
 				rv->io_start, len);
 		sz = disk_read_verify(rvp->disk, rvp->readbuf, rv->io_start,
 				len);
-		if (sz < 0) {
-			dbg_printf("IOERR %d %"PRIu64" %zu\n",
-					rvp->disk->d_fd, rv->io_start, len);
-			/* IO error, so try the next logical block. */
-			len = rvp->miniosz;
-			rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, len,
-					errno, rv->io_end_arg);
+		if (sz == len && io_max_size < rvp->miniosz) {
+			/*
+			 * If the verify request was 100% successful and less
+			 * than a single block in length, we were trying to
+			 * read to the end of a block after a short read.  That
+			 * suggests there's something funny with this device,
+			 * so single-step our way through the rest of the @rv
+			 * range.
+			 */
+			io_max_size = rvp->miniosz;
+		} else if (sz < 0) {
+			read_error = errno;
+
+			/* Runtime error, bail out... */
+			if (read_error != EIO && read_error != EILSEQ) {
+				rvp->runtime_error = read_error;
+				return;
+			}
+
+			/*
+			 * A direct read encountered an error while performing
+			 * a multi-block read.  Reduce the transfer size to a
+			 * single block so that we can identify the exact range
+			 * of bad blocks and good blocks.  We single-step all
+			 * the way to the end of the @rv range, (re)starting
+			 * with the block that just failed.
+			 */
+			if (io_max_size > rvp->miniosz) {
+				io_max_size = rvp->miniosz;
+				continue;
+			}
+
+			/*
+			 * A direct read hit an error while we were stepping
+			 * through single blocks.  Mark everything bad from
+			 * io_start to the next miniosz block.
+			 */
+			sz = rvp->miniosz - (rv->io_start % rvp->miniosz);
+			dbg_printf("IOERR %d @ %"PRIu64" %zu err %d\n",
+					rvp->disk->d_fd, rv->io_start, sz,
+					read_error);
+			rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, sz,
+					read_error, rv->io_end_arg);
+		} else if (sz < len) {
+			/*
+			 * A short direct read suggests that we might have hit
+			 * an IO error midway through the read but still had to
+			 * return the number of bytes that were actually read.
+			 *
+			 * We need to force an EIO, so try reading the rest of
+			 * the block (if it was a partial block read) or the
+			 * next full block.
+			 */
+			io_max_size = rvp->miniosz - (sz % rvp->miniosz);
+			dbg_printf("SHORT %d READ @ %"PRIu64" %zu try for %zd\n",
+					rvp->disk->d_fd, rv->io_start, sz,
+					io_max_size);
+		} else {
+			/* We should never get back more bytes than we asked. */
+			assert(sz == len);
 		}
 
-		progress_add(len);
-		verified += len;
-		rv->io_start += len;
-		rv->io_length -= len;
+		progress_add(sz);
+		if (read_error == 0)
+			verified += sz;
+		rv->io_start += sz;
+		rv->io_length -= sz;
 	}
 
 	free(rv);
author	Darrick J. Wong <darrick.wong@oracle.com>	2019-10-16 22:35:26 -0400
committer	Eric Sandeen <sandeen@sandeen.net>	2019-10-16 22:35:26 -0400
commit	27464242956facd1cefb68164f546be12c88f6ba (patch)
tree	317af0cb94bac92c33a30be53acab3749a27bdda /scrub
parent	323ef14c89b22fdf9265bba4f5f33b7bc891a33e (diff)
download	xfsprogs-dev-27464242956facd1cefb68164f546be12c88f6ba.tar.gz