diff options
author | Junio C Hamano <gitster@pobox.com> | 2024-02-12 13:16:09 -0800 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2024-02-12 13:16:10 -0800 |
commit | cf4a3bd8f1dc85b20766efe1381511fb4cb15a4f (patch) | |
tree | 1f25403b3e1762306a898f66b044ab86e7dc974c | |
parent | c875e0b8e036c12cfbf6531962108a063c7a821c (diff) | |
parent | 4950acae7d0db40c327003eff2621aaa2172322c (diff) | |
download | git-cf4a3bd8f1dc85b20766efe1381511fb4cb15a4f.tar.gz |
Merge branch 'ps/reftable-multi-level-indices-fix'
Write multi-level indices for reftable has been corrected.
* ps/reftable-multi-level-indices-fix:
reftable: document reading and writing indices
reftable/writer: fix writing multi-level indices
reftable/writer: simplify writing index records
reftable/writer: use correct type to iterate through index entries
reftable/reader: be more careful about errors in indexed seeks
-rw-r--r-- | reftable/reader.c | 30 | ||||
-rw-r--r-- | reftable/readwrite_test.c | 56 | ||||
-rw-r--r-- | reftable/writer.c | 63 |
3 files changed, 122 insertions, 27 deletions
diff --git a/reftable/reader.c b/reftable/reader.c index 64dc366fb1..6011d0aa04 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -508,8 +508,38 @@ static int reader_seek_indexed(struct reftable_reader *r, if (err < 0) goto done; + /* + * The index may consist of multiple levels, where each level may have + * multiple index blocks. We start by doing a linear search in the + * highest layer that identifies the relevant index block as well as + * the record inside that block that corresponds to our wanted key. + */ err = reader_seek_linear(&index_iter, &want_index); + if (err < 0) + goto done; + + /* + * Traverse down the levels until we find a non-index entry. + */ while (1) { + /* + * In case we seek a record that does not exist the index iter + * will tell us that the iterator is over. This works because + * the last index entry of the current level will contain the + * last key it knows about. So in case our seeked key is larger + * than the last indexed key we know that it won't exist. + * + * There is one subtlety in the layout of the index section + * that makes this work as expected: the highest-level index is + * at end of the section and will point backwards and thus we + * start reading from the end of the index section, not the + * beginning. + * + * If that wasn't the case and the order was reversed then the + * linear seek would seek into the lower levels and traverse + * all levels of the index only to find out that the key does + * not exist. + */ err = table_iter_next(&index_iter, &index_result); table_iter_block_done(&index_iter); if (err != 0) diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c index 6b99daeaf2..853923397e 100644 --- a/reftable/readwrite_test.c +++ b/reftable/readwrite_test.c @@ -866,6 +866,61 @@ static void test_write_multiple_indices(void) strbuf_release(&buf); } +static void test_write_multi_level_index(void) +{ + struct reftable_write_options opts = { + .block_size = 100, + }; + struct strbuf writer_buf = STRBUF_INIT, buf = STRBUF_INIT; + struct reftable_block_source source = { 0 }; + struct reftable_iterator it = { 0 }; + const struct reftable_stats *stats; + struct reftable_writer *writer; + struct reftable_reader *reader; + int err; + + writer = reftable_new_writer(&strbuf_add_void, &noop_flush, &writer_buf, &opts); + reftable_writer_set_limits(writer, 1, 1); + for (size_t i = 0; i < 200; i++) { + struct reftable_ref_record ref = { + .update_index = 1, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = {i}, + }; + + strbuf_reset(&buf); + strbuf_addf(&buf, "refs/heads/%03" PRIuMAX, (uintmax_t)i); + ref.refname = buf.buf, + + err = reftable_writer_add_ref(writer, &ref); + EXPECT_ERR(err); + } + reftable_writer_close(writer); + + /* + * The written refs should be sufficiently large to result in a + * multi-level index. + */ + stats = reftable_writer_stats(writer); + EXPECT(stats->ref_stats.max_index_level == 2); + + block_source_from_strbuf(&source, &writer_buf); + err = reftable_new_reader(&reader, &source, "filename"); + EXPECT_ERR(err); + + /* + * Seeking the last ref should work as expected. + */ + err = reftable_reader_seek_ref(reader, &it, "refs/heads/199"); + EXPECT_ERR(err); + + reftable_iterator_destroy(&it); + reftable_writer_free(writer); + reftable_reader_free(reader); + strbuf_release(&writer_buf); + strbuf_release(&buf); +} + static void test_corrupt_table_empty(void) { struct strbuf buf = STRBUF_INIT; @@ -916,5 +971,6 @@ int readwrite_test_main(int argc, const char *argv[]) RUN_TEST(test_write_object_id_length); RUN_TEST(test_write_object_id_min_length); RUN_TEST(test_write_multiple_indices); + RUN_TEST(test_write_multi_level_index); return 0; } diff --git a/reftable/writer.c b/reftable/writer.c index 92935baa70..e23953e498 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -379,20 +379,39 @@ int reftable_writer_add_logs(struct reftable_writer *w, static int writer_finish_section(struct reftable_writer *w) { + struct reftable_block_stats *bstats = NULL; uint8_t typ = block_writer_type(w->block_writer); uint64_t index_start = 0; int max_level = 0; - int threshold = w->opts.unpadded ? 1 : 3; + size_t threshold = w->opts.unpadded ? 1 : 3; int before_blocks = w->stats.idx_stats.blocks; - int err = writer_flush_block(w); - int i = 0; - struct reftable_block_stats *bstats = NULL; + int err; + + err = writer_flush_block(w); if (err < 0) return err; + /* + * When the section we are about to index has a lot of blocks then the + * index itself may span across multiple blocks, as well. This would + * require a linear scan over index blocks only to find the desired + * indexed block, which is inefficient. Instead, we write a multi-level + * index where index records of level N+1 will refer to index blocks of + * level N. This isn't constant time, either, but at least logarithmic. + * + * This loop handles writing this multi-level index. Note that we write + * the lowest-level index pointing to the indexed blocks first. We then + * continue writing additional index levels until the current level has + * less blocks than the threshold so that the highest level will be at + * the end of the index section. + * + * Readers are thus required to start reading the index section from + * its end, which is why we set `index_start` to the beginning of the + * last index section. + */ while (w->index_len > threshold) { struct reftable_index_record *idx = NULL; - int idx_len = 0; + size_t i, idx_len; max_level++; index_start = w->next; @@ -411,33 +430,26 @@ static int writer_finish_section(struct reftable_writer *w) .idx = idx[i], }, }; - if (block_writer_add(w->block_writer, &rec) == 0) { - continue; - } - err = writer_flush_block(w); + err = writer_add_record(w, &rec); if (err < 0) return err; + } - writer_reinit_block_writer(w, BLOCK_TYPE_INDEX); + err = writer_flush_block(w); + if (err < 0) + return err; - err = block_writer_add(w->block_writer, &rec); - if (err != 0) { - /* write into fresh block should always succeed - */ - abort(); - } - } - for (i = 0; i < idx_len; i++) { + for (i = 0; i < idx_len; i++) strbuf_release(&idx[i].last_key); - } reftable_free(idx); } - err = writer_flush_block(w); - if (err < 0) - return err; - + /* + * The index may still contain a number of index blocks lower than the + * threshold. Clear it so that these entries don't leak into the next + * index section. + */ writer_clear_index(w); bstats = writer_reftable_block_stats(w, typ); @@ -630,11 +642,8 @@ done: static void writer_clear_index(struct reftable_writer *w) { - int i = 0; - for (i = 0; i < w->index_len; i++) { + for (size_t i = 0; i < w->index_len; i++) strbuf_release(&w->index[i].last_key); - } - FREE_AND_NULL(w->index); w->index_len = 0; w->index_cap = 0; |