aboutsummaryrefslogtreecommitdiffstats
path: root/reftable
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2024-04-16 14:50:30 -0700
committerJunio C Hamano <gitster@pobox.com>2024-04-16 14:50:30 -0700
commit82a31ec32441cd06daa5e0397a73f4159cdaad4b (patch)
treee24ddf13b3eda8b9d5927570807af38238f3e837 /reftable
parent2b49e41155d826d40ede07dfd4d34a7a36f9f64b (diff)
parenta949ebd342440049a1ac77ca675f66884eae4187 (diff)
downloadgit-82a31ec32441cd06daa5e0397a73f4159cdaad4b.tar.gz
Merge branch 'jt/reftable-geometric-compaction'
The strategy to compact multiple tables of reftables after many operations accumulate many entries has been improved to avoid accumulating too many tables uncollected. * jt/reftable-geometric-compaction: reftable/stack: use geometric table compaction reftable/stack: add env to disable autocompaction reftable/stack: expose option to disable auto-compaction
Diffstat (limited to 'reftable')
-rw-r--r--reftable/reftable-writer.h3
-rw-r--r--reftable/stack.c125
-rw-r--r--reftable/stack.h4
-rw-r--r--reftable/stack_test.c77
4 files changed, 85 insertions, 124 deletions
diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h
index 7c7cae5f99..155bf0bbe2 100644
--- a/reftable/reftable-writer.h
+++ b/reftable/reftable-writer.h
@@ -46,6 +46,9 @@ struct reftable_write_options {
* is a single line, and add '\n' if missing.
*/
unsigned exact_log_message : 1;
+
+ /* boolean: Prevent auto-compaction of tables. */
+ unsigned disable_auto_compact : 1;
};
/* reftable_block_stats holds statistics for a single block type */
diff --git a/reftable/stack.c b/reftable/stack.c
index dde50b61d6..80266bcbab 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -680,7 +680,7 @@ int reftable_addition_commit(struct reftable_addition *add)
if (err)
goto done;
- if (!add->stack->disable_auto_compact) {
+ if (!add->stack->config.disable_auto_compact) {
/*
* Auto-compact the stack to keep the number of tables in
* control. It is possible that a concurrent writer is already
@@ -1216,75 +1216,76 @@ static int segment_size(struct segment *s)
return s->end - s->start;
}
-int fastlog2(uint64_t sz)
-{
- int l = 0;
- if (sz == 0)
- return 0;
- for (; sz; sz /= 2) {
- l++;
- }
- return l - 1;
-}
-
-struct segment *sizes_to_segments(size_t *seglen, uint64_t *sizes, size_t n)
-{
- struct segment *segs = reftable_calloc(n, sizeof(*segs));
- struct segment cur = { 0 };
- size_t next = 0, i;
-
- if (n == 0) {
- *seglen = 0;
- return segs;
- }
- for (i = 0; i < n; i++) {
- int log = fastlog2(sizes[i]);
- if (cur.log != log && cur.bytes > 0) {
- struct segment fresh = {
- .start = i,
- };
-
- segs[next++] = cur;
- cur = fresh;
- }
-
- cur.log = log;
- cur.end = i + 1;
- cur.bytes += sizes[i];
- }
- segs[next++] = cur;
- *seglen = next;
- return segs;
-}
-
struct segment suggest_compaction_segment(uint64_t *sizes, size_t n)
{
- struct segment min_seg = {
- .log = 64,
- };
- struct segment *segs;
- size_t seglen = 0, i;
-
- segs = sizes_to_segments(&seglen, sizes, n);
- for (i = 0; i < seglen; i++) {
- if (segment_size(&segs[i]) == 1)
- continue;
+ struct segment seg = { 0 };
+ uint64_t bytes;
+ size_t i;
- if (segs[i].log < min_seg.log)
- min_seg = segs[i];
- }
+ /*
+ * If there are no tables or only a single one then we don't have to
+ * compact anything. The sequence is geometric by definition already.
+ */
+ if (n <= 1)
+ return seg;
- while (min_seg.start > 0) {
- size_t prev = min_seg.start - 1;
- if (fastlog2(min_seg.bytes) < fastlog2(sizes[prev]))
+ /*
+ * Find the ending table of the compaction segment needed to restore the
+ * geometric sequence. Note that the segment end is exclusive.
+ *
+ * To do so, we iterate backwards starting from the most recent table
+ * until a valid segment end is found. If the preceding table is smaller
+ * than the current table multiplied by the geometric factor (2), the
+ * compaction segment end has been identified.
+ *
+ * Tables after the ending point are not added to the byte count because
+ * they are already valid members of the geometric sequence. Due to the
+ * properties of a geometric sequence, it is not possible for the sum of
+ * these tables to exceed the value of the ending point table.
+ *
+ * Example table size sequence requiring no compaction:
+ * 64, 32, 16, 8, 4, 2, 1
+ *
+ * Example table size sequence where compaction segment end is set to
+ * the last table. Since the segment end is exclusive, the last table is
+ * excluded during subsequent compaction and the table with size 3 is
+ * the final table included:
+ * 64, 32, 16, 8, 4, 3, 1
+ */
+ for (i = n - 1; i > 0; i--) {
+ if (sizes[i - 1] < sizes[i] * 2) {
+ seg.end = i + 1;
+ bytes = sizes[i];
break;
+ }
+ }
- min_seg.start = prev;
- min_seg.bytes += sizes[prev];
+ /*
+ * Find the starting table of the compaction segment by iterating
+ * through the remaining tables and keeping track of the accumulated
+ * size of all tables seen from the segment end table. The previous
+ * table is compared to the accumulated size because the tables from the
+ * segment end are merged backwards recursively.
+ *
+ * Note that we keep iterating even after we have found the first
+ * starting point. This is because there may be tables in the stack
+ * preceding that first starting point which violate the geometric
+ * sequence.
+ *
+ * Example compaction segment start set to table with size 32:
+ * 128, 32, 16, 8, 4, 3, 1
+ */
+ for (; i > 0; i--) {
+ uint64_t curr = bytes;
+ bytes += sizes[i - 1];
+
+ if (sizes[i - 1] < curr * 2) {
+ seg.start = i - 1;
+ seg.bytes = bytes;
+ }
}
- reftable_free(segs);
- return min_seg;
+ return seg;
}
static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st)
diff --git a/reftable/stack.h b/reftable/stack.h
index d919455669..d43efa4760 100644
--- a/reftable/stack.h
+++ b/reftable/stack.h
@@ -19,7 +19,6 @@ struct reftable_stack {
int list_fd;
char *reftable_dir;
- int disable_auto_compact;
struct reftable_write_options config;
@@ -33,12 +32,9 @@ int read_lines(const char *filename, char ***lines);
struct segment {
size_t start, end;
- int log;
uint64_t bytes;
};
-int fastlog2(uint64_t sz);
-struct segment *sizes_to_segments(size_t *seglen, uint64_t *sizes, size_t n);
struct segment suggest_compaction_segment(uint64_t *sizes, size_t n);
#endif
diff --git a/reftable/stack_test.c b/reftable/stack_test.c
index 351e35bd86..1df3ffce52 100644
--- a/reftable/stack_test.c
+++ b/reftable/stack_test.c
@@ -325,7 +325,7 @@ static void test_reftable_stack_transaction_api_performs_auto_compaction(void)
* we can ensure that we indeed honor this setting and have
* better control over when exactly auto compaction runs.
*/
- st->disable_auto_compact = i != n;
+ st->config.disable_auto_compact = i != n;
err = reftable_stack_new_addition(&add, st);
EXPECT_ERR(err);
@@ -497,6 +497,7 @@ static void test_reftable_stack_add(void)
struct reftable_write_options cfg = {
.exact_log_message = 1,
.default_permissions = 0660,
+ .disable_auto_compact = 1,
};
struct reftable_stack *st = NULL;
char *dir = get_tmp_dir(__LINE__);
@@ -508,7 +509,6 @@ static void test_reftable_stack_add(void)
err = reftable_new_stack(&st, dir, cfg);
EXPECT_ERR(err);
- st->disable_auto_compact = 1;
for (i = 0; i < N; i++) {
char buf[256];
@@ -770,59 +770,13 @@ static void test_reftable_stack_hash_id(void)
clear_dir(dir);
}
-static void test_log2(void)
-{
- EXPECT(1 == fastlog2(3));
- EXPECT(2 == fastlog2(4));
- EXPECT(2 == fastlog2(5));
-}
-
-static void test_sizes_to_segments(void)
-{
- uint64_t sizes[] = { 2, 3, 4, 5, 7, 9 };
- /* .................0 1 2 3 4 5 */
-
- size_t seglen = 0;
- struct segment *segs =
- sizes_to_segments(&seglen, sizes, ARRAY_SIZE(sizes));
- EXPECT(segs[2].log == 3);
- EXPECT(segs[2].start == 5);
- EXPECT(segs[2].end == 6);
-
- EXPECT(segs[1].log == 2);
- EXPECT(segs[1].start == 2);
- EXPECT(segs[1].end == 5);
- reftable_free(segs);
-}
-
-static void test_sizes_to_segments_empty(void)
-{
- size_t seglen = 0;
- struct segment *segs = sizes_to_segments(&seglen, NULL, 0);
- EXPECT(seglen == 0);
- reftable_free(segs);
-}
-
-static void test_sizes_to_segments_all_equal(void)
-{
- uint64_t sizes[] = { 5, 5 };
- size_t seglen = 0;
- struct segment *segs =
- sizes_to_segments(&seglen, sizes, ARRAY_SIZE(sizes));
- EXPECT(seglen == 1);
- EXPECT(segs[0].start == 0);
- EXPECT(segs[0].end == 2);
- reftable_free(segs);
-}
-
static void test_suggest_compaction_segment(void)
{
- uint64_t sizes[] = { 128, 64, 17, 16, 9, 9, 9, 16, 16 };
- /* .................0 1 2 3 4 5 6 */
+ uint64_t sizes[] = { 512, 64, 17, 16, 9, 9, 9, 16, 2, 16 };
struct segment min =
suggest_compaction_segment(sizes, ARRAY_SIZE(sizes));
- EXPECT(min.start == 2);
- EXPECT(min.end == 7);
+ EXPECT(min.start == 1);
+ EXPECT(min.end == 10);
}
static void test_suggest_compaction_segment_nothing(void)
@@ -933,9 +887,21 @@ static void test_empty_add(void)
reftable_stack_destroy(st2);
}
+static int fastlog2(uint64_t sz)
+{
+ int l = 0;
+ if (sz == 0)
+ return 0;
+ for (; sz; sz /= 2)
+ l++;
+ return l - 1;
+}
+
static void test_reftable_stack_auto_compaction(void)
{
- struct reftable_write_options cfg = { 0 };
+ struct reftable_write_options cfg = {
+ .disable_auto_compact = 1,
+ };
struct reftable_stack *st = NULL;
char *dir = get_tmp_dir(__LINE__);
@@ -945,7 +911,6 @@ static void test_reftable_stack_auto_compaction(void)
err = reftable_new_stack(&st, dir, cfg);
EXPECT_ERR(err);
- st->disable_auto_compact = 1; /* call manually below for coverage. */
for (i = 0; i < N; i++) {
char name[100];
struct reftable_ref_record ref = {
@@ -994,7 +959,7 @@ static void test_reftable_stack_add_performs_auto_compaction(void)
* we can ensure that we indeed honor this setting and have
* better control over when exactly auto compaction runs.
*/
- st->disable_auto_compact = i != n;
+ st->config.disable_auto_compact = i != n;
strbuf_reset(&refname);
strbuf_addf(&refname, "branch-%04d", i);
@@ -1121,7 +1086,6 @@ static void test_reftable_stack_compaction_concurrent_clean(void)
int stack_test_main(int argc, const char *argv[])
{
RUN_TEST(test_empty_add);
- RUN_TEST(test_log2);
RUN_TEST(test_names_equal);
RUN_TEST(test_parse_names);
RUN_TEST(test_read_file);
@@ -1142,9 +1106,6 @@ int stack_test_main(int argc, const char *argv[])
RUN_TEST(test_reftable_stack_update_index_check);
RUN_TEST(test_reftable_stack_uptodate);
RUN_TEST(test_reftable_stack_validate_refname);
- RUN_TEST(test_sizes_to_segments);
- RUN_TEST(test_sizes_to_segments_all_equal);
- RUN_TEST(test_sizes_to_segments_empty);
RUN_TEST(test_suggest_compaction_segment);
RUN_TEST(test_suggest_compaction_segment_nothing);
return 0;