aboutsummaryrefslogtreecommitdiffstats
path: root/reftable
diff options
context:
space:
mode:
authorPatrick Steinhardt <ps@pks.im>2024-02-01 08:52:12 +0100
committerJunio C Hamano <gitster@pobox.com>2024-02-01 11:11:33 -0800
commit4950acae7d0db40c327003eff2621aaa2172322c (patch)
tree01ade781c71cd412318e81344e3aa2d99adad86d /reftable
parente7485601ca4da6a09c6488add181609cffec5799 (diff)
downloadgit-4950acae7d0db40c327003eff2621aaa2172322c.tar.gz
reftable: document reading and writing indices
The way the index gets written and read is not trivial at all and requires the reader to piece together a bunch of parts to figure out how it works. Add some documentation to hopefully make this easier to understand for the next reader. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'reftable')
-rw-r--r--reftable/reader.c27
-rw-r--r--reftable/writer.c23
2 files changed, 50 insertions, 0 deletions
diff --git a/reftable/reader.c b/reftable/reader.c
index 278f727a3d..6011d0aa04 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -508,11 +508,38 @@ static int reader_seek_indexed(struct reftable_reader *r,
if (err < 0)
goto done;
+ /*
+ * The index may consist of multiple levels, where each level may have
+ * multiple index blocks. We start by doing a linear search in the
+ * highest layer that identifies the relevant index block as well as
+ * the record inside that block that corresponds to our wanted key.
+ */
err = reader_seek_linear(&index_iter, &want_index);
if (err < 0)
goto done;
+ /*
+ * Traverse down the levels until we find a non-index entry.
+ */
while (1) {
+ /*
+ * In case we seek a record that does not exist the index iter
+ * will tell us that the iterator is over. This works because
+ * the last index entry of the current level will contain the
+ * last key it knows about. So in case our seeked key is larger
+ * than the last indexed key we know that it won't exist.
+ *
+ * There is one subtlety in the layout of the index section
+ * that makes this work as expected: the highest-level index is
+ * at end of the section and will point backwards and thus we
+ * start reading from the end of the index section, not the
+ * beginning.
+ *
+ * If that wasn't the case and the order was reversed then the
+ * linear seek would seek into the lower levels and traverse
+ * all levels of the index only to find out that the key does
+ * not exist.
+ */
err = table_iter_next(&index_iter, &index_result);
table_iter_block_done(&index_iter);
if (err != 0)
diff --git a/reftable/writer.c b/reftable/writer.c
index 0349094d29..e23953e498 100644
--- a/reftable/writer.c
+++ b/reftable/writer.c
@@ -391,6 +391,24 @@ static int writer_finish_section(struct reftable_writer *w)
if (err < 0)
return err;
+ /*
+ * When the section we are about to index has a lot of blocks then the
+ * index itself may span across multiple blocks, as well. This would
+ * require a linear scan over index blocks only to find the desired
+ * indexed block, which is inefficient. Instead, we write a multi-level
+ * index where index records of level N+1 will refer to index blocks of
+ * level N. This isn't constant time, either, but at least logarithmic.
+ *
+ * This loop handles writing this multi-level index. Note that we write
+ * the lowest-level index pointing to the indexed blocks first. We then
+ * continue writing additional index levels until the current level has
+ * less blocks than the threshold so that the highest level will be at
+ * the end of the index section.
+ *
+ * Readers are thus required to start reading the index section from
+ * its end, which is why we set `index_start` to the beginning of the
+ * last index section.
+ */
while (w->index_len > threshold) {
struct reftable_index_record *idx = NULL;
size_t i, idx_len;
@@ -427,6 +445,11 @@ static int writer_finish_section(struct reftable_writer *w)
reftable_free(idx);
}
+ /*
+ * The index may still contain a number of index blocks lower than the
+ * threshold. Clear it so that these entries don't leak into the next
+ * index section.
+ */
writer_clear_index(w);
bstats = writer_reftable_block_stats(w, typ);