aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2024-02-12 13:16:09 -0800
committerJunio C Hamano <gitster@pobox.com>2024-02-12 13:16:10 -0800
commitcf4a3bd8f1dc85b20766efe1381511fb4cb15a4f (patch)
tree1f25403b3e1762306a898f66b044ab86e7dc974c
parentc875e0b8e036c12cfbf6531962108a063c7a821c (diff)
parent4950acae7d0db40c327003eff2621aaa2172322c (diff)
downloadgit-cf4a3bd8f1dc85b20766efe1381511fb4cb15a4f.tar.gz
Merge branch 'ps/reftable-multi-level-indices-fix'
Write multi-level indices for reftable has been corrected. * ps/reftable-multi-level-indices-fix: reftable: document reading and writing indices reftable/writer: fix writing multi-level indices reftable/writer: simplify writing index records reftable/writer: use correct type to iterate through index entries reftable/reader: be more careful about errors in indexed seeks
-rw-r--r--reftable/reader.c30
-rw-r--r--reftable/readwrite_test.c56
-rw-r--r--reftable/writer.c63
3 files changed, 122 insertions, 27 deletions
diff --git a/reftable/reader.c b/reftable/reader.c
index 64dc366fb1..6011d0aa04 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -508,8 +508,38 @@ static int reader_seek_indexed(struct reftable_reader *r,
if (err < 0)
goto done;
+ /*
+ * The index may consist of multiple levels, where each level may have
+ * multiple index blocks. We start by doing a linear search in the
+ * highest layer that identifies the relevant index block as well as
+ * the record inside that block that corresponds to our wanted key.
+ */
err = reader_seek_linear(&index_iter, &want_index);
+ if (err < 0)
+ goto done;
+
+ /*
+ * Traverse down the levels until we find a non-index entry.
+ */
while (1) {
+ /*
+ * In case we seek a record that does not exist the index iter
+ * will tell us that the iterator is over. This works because
+ * the last index entry of the current level will contain the
+ * last key it knows about. So in case our seeked key is larger
+ * than the last indexed key we know that it won't exist.
+ *
+ * There is one subtlety in the layout of the index section
+ * that makes this work as expected: the highest-level index is
+ * at end of the section and will point backwards and thus we
+ * start reading from the end of the index section, not the
+ * beginning.
+ *
+ * If that wasn't the case and the order was reversed then the
+ * linear seek would seek into the lower levels and traverse
+ * all levels of the index only to find out that the key does
+ * not exist.
+ */
err = table_iter_next(&index_iter, &index_result);
table_iter_block_done(&index_iter);
if (err != 0)
diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c
index 6b99daeaf2..853923397e 100644
--- a/reftable/readwrite_test.c
+++ b/reftable/readwrite_test.c
@@ -866,6 +866,61 @@ static void test_write_multiple_indices(void)
strbuf_release(&buf);
}
+static void test_write_multi_level_index(void)
+{
+ struct reftable_write_options opts = {
+ .block_size = 100,
+ };
+ struct strbuf writer_buf = STRBUF_INIT, buf = STRBUF_INIT;
+ struct reftable_block_source source = { 0 };
+ struct reftable_iterator it = { 0 };
+ const struct reftable_stats *stats;
+ struct reftable_writer *writer;
+ struct reftable_reader *reader;
+ int err;
+
+ writer = reftable_new_writer(&strbuf_add_void, &noop_flush, &writer_buf, &opts);
+ reftable_writer_set_limits(writer, 1, 1);
+ for (size_t i = 0; i < 200; i++) {
+ struct reftable_ref_record ref = {
+ .update_index = 1,
+ .value_type = REFTABLE_REF_VAL1,
+ .value.val1 = {i},
+ };
+
+ strbuf_reset(&buf);
+ strbuf_addf(&buf, "refs/heads/%03" PRIuMAX, (uintmax_t)i);
+ ref.refname = buf.buf,
+
+ err = reftable_writer_add_ref(writer, &ref);
+ EXPECT_ERR(err);
+ }
+ reftable_writer_close(writer);
+
+ /*
+ * The written refs should be sufficiently large to result in a
+ * multi-level index.
+ */
+ stats = reftable_writer_stats(writer);
+ EXPECT(stats->ref_stats.max_index_level == 2);
+
+ block_source_from_strbuf(&source, &writer_buf);
+ err = reftable_new_reader(&reader, &source, "filename");
+ EXPECT_ERR(err);
+
+ /*
+ * Seeking the last ref should work as expected.
+ */
+ err = reftable_reader_seek_ref(reader, &it, "refs/heads/199");
+ EXPECT_ERR(err);
+
+ reftable_iterator_destroy(&it);
+ reftable_writer_free(writer);
+ reftable_reader_free(reader);
+ strbuf_release(&writer_buf);
+ strbuf_release(&buf);
+}
+
static void test_corrupt_table_empty(void)
{
struct strbuf buf = STRBUF_INIT;
@@ -916,5 +971,6 @@ int readwrite_test_main(int argc, const char *argv[])
RUN_TEST(test_write_object_id_length);
RUN_TEST(test_write_object_id_min_length);
RUN_TEST(test_write_multiple_indices);
+ RUN_TEST(test_write_multi_level_index);
return 0;
}
diff --git a/reftable/writer.c b/reftable/writer.c
index 92935baa70..e23953e498 100644
--- a/reftable/writer.c
+++ b/reftable/writer.c
@@ -379,20 +379,39 @@ int reftable_writer_add_logs(struct reftable_writer *w,
static int writer_finish_section(struct reftable_writer *w)
{
+ struct reftable_block_stats *bstats = NULL;
uint8_t typ = block_writer_type(w->block_writer);
uint64_t index_start = 0;
int max_level = 0;
- int threshold = w->opts.unpadded ? 1 : 3;
+ size_t threshold = w->opts.unpadded ? 1 : 3;
int before_blocks = w->stats.idx_stats.blocks;
- int err = writer_flush_block(w);
- int i = 0;
- struct reftable_block_stats *bstats = NULL;
+ int err;
+
+ err = writer_flush_block(w);
if (err < 0)
return err;
+ /*
+ * When the section we are about to index has a lot of blocks then the
+ * index itself may span across multiple blocks, as well. This would
+ * require a linear scan over index blocks only to find the desired
+ * indexed block, which is inefficient. Instead, we write a multi-level
+ * index where index records of level N+1 will refer to index blocks of
+ * level N. This isn't constant time, either, but at least logarithmic.
+ *
+ * This loop handles writing this multi-level index. Note that we write
+ * the lowest-level index pointing to the indexed blocks first. We then
+ * continue writing additional index levels until the current level has
+ * less blocks than the threshold so that the highest level will be at
+ * the end of the index section.
+ *
+ * Readers are thus required to start reading the index section from
+ * its end, which is why we set `index_start` to the beginning of the
+ * last index section.
+ */
while (w->index_len > threshold) {
struct reftable_index_record *idx = NULL;
- int idx_len = 0;
+ size_t i, idx_len;
max_level++;
index_start = w->next;
@@ -411,33 +430,26 @@ static int writer_finish_section(struct reftable_writer *w)
.idx = idx[i],
},
};
- if (block_writer_add(w->block_writer, &rec) == 0) {
- continue;
- }
- err = writer_flush_block(w);
+ err = writer_add_record(w, &rec);
if (err < 0)
return err;
+ }
- writer_reinit_block_writer(w, BLOCK_TYPE_INDEX);
+ err = writer_flush_block(w);
+ if (err < 0)
+ return err;
- err = block_writer_add(w->block_writer, &rec);
- if (err != 0) {
- /* write into fresh block should always succeed
- */
- abort();
- }
- }
- for (i = 0; i < idx_len; i++) {
+ for (i = 0; i < idx_len; i++)
strbuf_release(&idx[i].last_key);
- }
reftable_free(idx);
}
- err = writer_flush_block(w);
- if (err < 0)
- return err;
-
+ /*
+ * The index may still contain a number of index blocks lower than the
+ * threshold. Clear it so that these entries don't leak into the next
+ * index section.
+ */
writer_clear_index(w);
bstats = writer_reftable_block_stats(w, typ);
@@ -630,11 +642,8 @@ done:
static void writer_clear_index(struct reftable_writer *w)
{
- int i = 0;
- for (i = 0; i < w->index_len; i++) {
+ for (size_t i = 0; i < w->index_len; i++)
strbuf_release(&w->index[i].last_key);
- }
-
FREE_AND_NULL(w->index);
w->index_len = 0;
w->index_cap = 0;