aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrick Steinhardt <ps@pks.im>2024-04-08 14:16:59 +0200
committerJunio C Hamano <gitster@pobox.com>2024-04-15 10:36:09 -0700
commit15a60b747e4f0e0d11353f8e89bc9ce7b36c5512 (patch)
tree2289099212c7955de685d40715c5cc76b2865d26
parentdd347bbce6053538d3332e6e8e499a3bebdbd251 (diff)
downloadgit-15a60b747e4f0e0d11353f8e89bc9ce7b36c5512.tar.gz
reftable/block: open-code call to `uncompress2()`
The reftable format stores log blocks in a compressed format. Thus, whenever we want to read such a block we first need to decompress it. This is done by calling the convenience function `uncompress2()` of the zlib library, which is a simple wrapper that manages the lifecycle of the `zstream` structure for us. While nice for one-off inflation of data, when iterating through reflogs we will likely end up inflating many such log blocks. This requires us to reallocate the state of the `zstream` every single time, which adds up over time. It would thus be great to reuse the `zstream` instead of discarding it after every inflation. Open-code the call to `uncompress2()` such that we can start reusing the `zstream` in the subsequent commit. Note that our open-coded variant is different from `uncompress2()` in two ways: - We do not loop around `inflate()` until we have processed all input. As our input is limited by the maximum block size, which is 16MB, we should not hit limits of `inflate()`. - We use `Z_FINISH` instead of `Z_NO_FLUSH`. Quoting the `inflate()` documentation: "inflate() should normally be called until it returns Z_STREAM_END or an error. However if all decompression is to be performed in a single step (a single call of inflate), the parameter flush should be set to Z_FINISH." Furthermore, "Z_FINISH also informs inflate to not maintain a sliding window if the stream completes, which reduces inflate's memory footprint." Other than that this commit is expected to be functionally equivalent and does not yet reuse the `zstream`. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--reftable/block.c38
1 files changed, 28 insertions, 10 deletions
diff --git a/reftable/block.c b/reftable/block.c
index 9460273290..435922b569 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -195,10 +195,10 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
}
if (typ == BLOCK_TYPE_LOG) {
- int block_header_skip = 4 + header_off;
- uLongf dst_len = sz - block_header_skip; /* total size of dest
- buffer. */
- uLongf src_len = block->len - block_header_skip;
+ uint32_t block_header_skip = 4 + header_off;
+ uLong dst_len = sz - block_header_skip;
+ uLong src_len = block->len - block_header_skip;
+ z_stream stream = {0};
/* Log blocks specify the *uncompressed* size in their header. */
REFTABLE_ALLOC_GROW(br->uncompressed_data, sz,
@@ -207,15 +207,33 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
/* Copy over the block header verbatim. It's not compressed. */
memcpy(br->uncompressed_data, block->data, block_header_skip);
- /* Uncompress */
- if (Z_OK !=
- uncompress2(br->uncompressed_data + block_header_skip, &dst_len,
- block->data + block_header_skip, &src_len)) {
+ err = inflateInit(&stream);
+ if (err != Z_OK) {
err = REFTABLE_ZLIB_ERROR;
goto done;
}
- if (dst_len + block_header_skip != sz) {
+ stream.next_in = block->data + block_header_skip;
+ stream.avail_in = src_len;
+ stream.next_out = br->uncompressed_data + block_header_skip;
+ stream.avail_out = dst_len;
+
+ /*
+ * We know both input as well as output size, and we know that
+ * the sizes should never be bigger than `uInt_MAX` because
+ * blocks can at most be 16MB large. We can thus use `Z_FINISH`
+ * here to instruct zlib to inflate the data in one go, which
+ * is more efficient than using `Z_NO_FLUSH`.
+ */
+ err = inflate(&stream, Z_FINISH);
+ inflateEnd(&stream);
+ if (err != Z_STREAM_END) {
+ err = REFTABLE_ZLIB_ERROR;
+ goto done;
+ }
+ err = 0;
+
+ if (stream.total_out + block_header_skip != sz) {
err = REFTABLE_FORMAT_ERROR;
goto done;
}
@@ -224,7 +242,7 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
reftable_block_done(block);
block->data = br->uncompressed_data;
block->len = sz;
- full_block_size = src_len + block_header_skip;
+ full_block_size = src_len + block_header_skip - stream.avail_in;
} else if (full_block_size == 0) {
full_block_size = sz;
} else if (sz < full_block_size && sz < block->len &&