From e32b8ece640fc700c5a74dd53e6cae8b1a893a6d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:29 +0100 Subject: reftable: wrap EXPECT macros in do/while The `EXPECT` macros used by the reftable test framework are all using a single `if` statement with the actual condition. This results in weird syntax when using them in if/else statements like the following: ``` if (foo) EXPECT(foo == 2) else EXPECT(bar == 2) ``` Note that there need not be a trailing semicolon. Furthermore, it is not immediately obvious whether the else now belongs to the `if (foo)` or whether it belongs to the expanded `if (foo == 2)` from the macro. Fix this by wrapping the macros in a do/while loop. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/test_framework.h | 58 ++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/reftable/test_framework.h b/reftable/test_framework.h index 774cb275bf..ee44f735ae 100644 --- a/reftable/test_framework.h +++ b/reftable/test_framework.h @@ -12,32 +12,38 @@ https://developers.google.com/open-source/licenses/bsd #include "system.h" #include "reftable-error.h" -#define EXPECT_ERR(c) \ - if (c != 0) { \ - fflush(stderr); \ - fflush(stdout); \ - fprintf(stderr, "%s: %d: error == %d (%s), want 0\n", \ - __FILE__, __LINE__, c, reftable_error_str(c)); \ - abort(); \ - } - -#define EXPECT_STREQ(a, b) \ - if (strcmp(a, b)) { \ - fflush(stderr); \ - fflush(stdout); \ - fprintf(stderr, "%s:%d: %s (%s) != %s (%s)\n", __FILE__, \ - __LINE__, #a, a, #b, b); \ - abort(); \ - } - -#define EXPECT(c) \ - if (!(c)) { \ - fflush(stderr); \ - fflush(stdout); \ - fprintf(stderr, "%s: %d: failed assertion %s\n", __FILE__, \ - __LINE__, #c); \ - abort(); \ - } +#define EXPECT_ERR(c) \ + do { \ + if (c != 0) { \ + fflush(stderr); \ + fflush(stdout); \ + fprintf(stderr, "%s: %d: error == %d (%s), want 0\n", \ + __FILE__, __LINE__, c, reftable_error_str(c)); \ + abort(); \ + } \ + } while (0) + +#define EXPECT_STREQ(a, b) \ + do { \ + if (strcmp(a, b)) { \ + fflush(stderr); \ + fflush(stdout); \ + fprintf(stderr, "%s:%d: %s (%s) != %s (%s)\n", __FILE__, \ + __LINE__, #a, a, #b, b); \ + abort(); \ + } \ + } while (0) + +#define EXPECT(c) \ + do { \ + if (!(c)) { \ + fflush(stderr); \ + fflush(stdout); \ + fprintf(stderr, "%s: %d: failed assertion %s\n", __FILE__, \ + __LINE__, #c); \ + abort(); \ + } \ + } while (0) #define RUN_TEST(f) \ fprintf(stderr, "running %s\n", #f); \ -- cgit 1.2.3-korg From 917a2b3ce990717ccce5ff372766d9ffd41239f6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:34 +0100 Subject: reftable: handle interrupted reads There are calls to pread(3P) and read(3P) where we don't properly handle interrupts. Convert them to use `pread_in_full()` and `read_in_full()`, respectively. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/blocksource.c | 2 +- reftable/stack.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/reftable/blocksource.c b/reftable/blocksource.c index 8331b34e82..a1ea304429 100644 --- a/reftable/blocksource.c +++ b/reftable/blocksource.c @@ -109,7 +109,7 @@ static int file_read_block(void *v, struct reftable_block *dest, uint64_t off, struct file_block_source *b = v; assert(off + size <= b->size); dest->data = reftable_malloc(size); - if (pread(b->fd, dest->data, size, off) != size) + if (pread_in_full(b->fd, dest->data, size, off) != size) return -1; dest->len = size; return size; diff --git a/reftable/stack.c b/reftable/stack.c index ddbdf1b9c8..ed108a929b 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -92,7 +92,7 @@ static int fd_read_lines(int fd, char ***namesp) } buf = reftable_malloc(size + 1); - if (read(fd, buf, size) != size) { + if (read_in_full(fd, buf, size) != size) { err = REFTABLE_IO_ERROR; goto done; } -- cgit 1.2.3-korg From 85a8c899cedf46b79dd333549ed24b7ea7697862 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:38 +0100 Subject: reftable: handle interrupted writes There are calls to write(3P) where we don't properly handle interrupts. Convert them to use `write_in_full()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 6 +++--- reftable/stack_test.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index ed108a929b..f0cadad490 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -42,7 +42,7 @@ static void stack_filename(struct strbuf *dest, struct reftable_stack *st, static ssize_t reftable_fd_write(void *arg, const void *data, size_t sz) { int *fdp = (int *)arg; - return write(*fdp, data, sz); + return write_in_full(*fdp, data, sz); } int reftable_new_stack(struct reftable_stack **dest, const char *dir, @@ -554,7 +554,7 @@ int reftable_addition_commit(struct reftable_addition *add) strbuf_addstr(&table_list, "\n"); } - err = write(add->lock_file_fd, table_list.buf, table_list.len); + err = write_in_full(add->lock_file_fd, table_list.buf, table_list.len); strbuf_release(&table_list); if (err < 0) { err = REFTABLE_IO_ERROR; @@ -1024,7 +1024,7 @@ static int stack_compact_range(struct reftable_stack *st, int first, int last, strbuf_addstr(&ref_list_contents, "\n"); } - err = write(lock_file_fd, ref_list_contents.buf, ref_list_contents.len); + err = write_in_full(lock_file_fd, ref_list_contents.buf, ref_list_contents.len); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); diff --git a/reftable/stack_test.c b/reftable/stack_test.c index d0b717510f..0644c8ad2e 100644 --- a/reftable/stack_test.c +++ b/reftable/stack_test.c @@ -78,7 +78,7 @@ static void test_read_file(void) int i = 0; EXPECT(fd > 0); - n = write(fd, out, strlen(out)); + n = write_in_full(fd, out, strlen(out)); EXPECT(n == strlen(out)); err = close(fd); EXPECT(err >= 0); -- cgit 1.2.3-korg From 15f98b602f36ce5e5cc1f466850eaf2b37022e3b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:42 +0100 Subject: reftable/stack: verify that `reftable_stack_add()` uses auto-compaction While we have several tests that check whether we correctly perform auto-compaction when manually calling `reftable_stack_auto_compact()`, we don't have any tests that verify whether `reftable_stack_add()` does call it automatically. Add one. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack_test.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/reftable/stack_test.c b/reftable/stack_test.c index 0644c8ad2e..52b4dc3b14 100644 --- a/reftable/stack_test.c +++ b/reftable/stack_test.c @@ -850,6 +850,54 @@ static void test_reftable_stack_auto_compaction(void) clear_dir(dir); } +static void test_reftable_stack_add_performs_auto_compaction(void) +{ + struct reftable_write_options cfg = { 0 }; + struct reftable_stack *st = NULL; + struct strbuf refname = STRBUF_INIT; + char *dir = get_tmp_dir(__LINE__); + int err, i, n = 20; + + err = reftable_new_stack(&st, dir, cfg); + EXPECT_ERR(err); + + for (i = 0; i <= n; i++) { + struct reftable_ref_record ref = { + .update_index = reftable_stack_next_update_index(st), + .value_type = REFTABLE_REF_SYMREF, + .value.symref = "master", + }; + + /* + * Disable auto-compaction for all but the last runs. Like this + * we can ensure that we indeed honor this setting and have + * better control over when exactly auto compaction runs. + */ + st->disable_auto_compact = i != n; + + strbuf_reset(&refname); + strbuf_addf(&refname, "branch-%04d", i); + ref.refname = refname.buf; + + err = reftable_stack_add(st, &write_test_ref, &ref); + EXPECT_ERR(err); + + /* + * The stack length should grow continuously for all runs where + * auto compaction is disabled. When enabled, we should merge + * all tables in the stack. + */ + if (i != n) + EXPECT(st->merged->stack_len == i + 1); + else + EXPECT(st->merged->stack_len == 1); + } + + reftable_stack_destroy(st); + strbuf_release(&refname); + clear_dir(dir); +} + static void test_reftable_stack_compaction_concurrent(void) { struct reftable_write_options cfg = { 0 }; @@ -960,6 +1008,7 @@ int stack_test_main(int argc, const char *argv[]) RUN_TEST(test_reftable_stack_add); RUN_TEST(test_reftable_stack_add_one); RUN_TEST(test_reftable_stack_auto_compaction); + RUN_TEST(test_reftable_stack_add_performs_auto_compaction); RUN_TEST(test_reftable_stack_compaction_concurrent); RUN_TEST(test_reftable_stack_compaction_concurrent_clean); RUN_TEST(test_reftable_stack_hash_id); -- cgit 1.2.3-korg From 5c086453ff32a2ca07ac455594d59a9943b5d113 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:46 +0100 Subject: reftable/stack: perform auto-compaction with transactional interface Whenever updating references or reflog entries in the reftable stack, we need to add a new table to the stack, thus growing the stack's length by one. The stack can grow to become quite long rather quickly, leading to performance issues when trying to read records. But besides performance issues, this can also lead to exhaustion of file descriptors very rapidly as every single table requires a separate descriptor when opening the stack. While git-pack-refs(1) fixes this issue for us by merging the tables, it runs too irregularly to keep the length of the stack within reasonable limits. This is why the reftable stack has an auto-compaction mechanism: `reftable_stack_add()` will call `reftable_stack_auto_compact()` after its added the new table, which will auto-compact the stack as required. But while this logic works alright for `reftable_stack_add()`, we do not do the same in `reftable_addition_commit()`, which is the transactional equivalent to the former function that allows us to write multiple updates to the stack atomically. Consequentially, we will easily run into file descriptor exhaustion in code paths that use many separate transactions like e.g. non-atomic fetches. Fix this issue by calling `reftable_stack_auto_compact()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 6 ++++++ reftable/stack_test.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/reftable/stack.c b/reftable/stack.c index f0cadad490..f5d18a842a 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -584,6 +584,12 @@ int reftable_addition_commit(struct reftable_addition *add) add->new_tables_len = 0; err = reftable_stack_reload(add->stack); + if (err) + goto done; + + if (!add->stack->disable_auto_compact) + err = reftable_stack_auto_compact(add->stack); + done: reftable_addition_close(add); return err; diff --git a/reftable/stack_test.c b/reftable/stack_test.c index 52b4dc3b14..14a3fc11ee 100644 --- a/reftable/stack_test.c +++ b/reftable/stack_test.c @@ -289,6 +289,61 @@ static void test_reftable_stack_transaction_api(void) clear_dir(dir); } +static void test_reftable_stack_transaction_api_performs_auto_compaction(void) +{ + char *dir = get_tmp_dir(__LINE__); + struct reftable_write_options cfg = {0}; + struct reftable_addition *add = NULL; + struct reftable_stack *st = NULL; + int i, n = 20, err; + + err = reftable_new_stack(&st, dir, cfg); + EXPECT_ERR(err); + + for (i = 0; i <= n; i++) { + struct reftable_ref_record ref = { + .update_index = reftable_stack_next_update_index(st), + .value_type = REFTABLE_REF_SYMREF, + .value.symref = "master", + }; + char name[100]; + + snprintf(name, sizeof(name), "branch%04d", i); + ref.refname = name; + + /* + * Disable auto-compaction for all but the last runs. Like this + * we can ensure that we indeed honor this setting and have + * better control over when exactly auto compaction runs. + */ + st->disable_auto_compact = i != n; + + err = reftable_stack_new_addition(&add, st); + EXPECT_ERR(err); + + err = reftable_addition_add(add, &write_test_ref, &ref); + EXPECT_ERR(err); + + err = reftable_addition_commit(add); + EXPECT_ERR(err); + + reftable_addition_destroy(add); + + /* + * The stack length should grow continuously for all runs where + * auto compaction is disabled. When enabled, we should merge + * all tables in the stack. + */ + if (i != n) + EXPECT(st->merged->stack_len == i + 1); + else + EXPECT(st->merged->stack_len == 1); + } + + reftable_stack_destroy(st); + clear_dir(dir); +} + static void test_reftable_stack_validate_refname(void) { struct reftable_write_options cfg = { 0 }; @@ -1016,6 +1071,7 @@ int stack_test_main(int argc, const char *argv[]) RUN_TEST(test_reftable_stack_log_normalize); RUN_TEST(test_reftable_stack_tombstone); RUN_TEST(test_reftable_stack_transaction_api); + RUN_TEST(test_reftable_stack_transaction_api_performs_auto_compaction); RUN_TEST(test_reftable_stack_update_index_check); RUN_TEST(test_reftable_stack_uptodate); RUN_TEST(test_reftable_stack_validate_refname); -- cgit 1.2.3-korg From d779996a1087747886853f4e73cff920c09ab1a2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:50 +0100 Subject: reftable/stack: reuse buffers when reloading stack In `reftable_stack_reload_once()` we iterate over all the tables added to the stack in order to figure out whether any of the tables needs to be reloaded. We use a set of buffers in this context to compute the paths of these tables, but discard those buffers on every iteration. This is quite wasteful given that we do not need to transfer ownership of the allocated buffer outside of the loop. Refactor the code to instead reuse the buffers to reduce the number of allocations we need to do. Note that we do not have to manually reset the buffer because `stack_filename()` does this for us already. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index f5d18a842a..2dd2373360 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -204,6 +204,7 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, reftable_calloc(sizeof(struct reftable_table) * names_len); int new_readers_len = 0; struct reftable_merged_table *new_merged = NULL; + struct strbuf table_path = STRBUF_INIT; int i; while (*names) { @@ -223,13 +224,10 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, if (!rd) { struct reftable_block_source src = { NULL }; - struct strbuf table_path = STRBUF_INIT; stack_filename(&table_path, st, name); err = reftable_block_source_from_file(&src, table_path.buf); - strbuf_release(&table_path); - if (err < 0) goto done; @@ -267,16 +265,13 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, for (i = 0; i < cur_len; i++) { if (cur[i]) { const char *name = reader_name(cur[i]); - struct strbuf filename = STRBUF_INIT; - stack_filename(&filename, st, name); + stack_filename(&table_path, st, name); reader_close(cur[i]); reftable_reader_free(cur[i]); /* On Windows, can only unlink after closing. */ - unlink(filename.buf); - - strbuf_release(&filename); + unlink(table_path.buf); } } @@ -288,6 +283,7 @@ done: reftable_free(new_readers); reftable_free(new_tables); reftable_free(cur); + strbuf_release(&table_path); return err; } -- cgit 1.2.3-korg From 3054fbd93edb5f12c1a320dfb6abec139bdf9628 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:54 +0100 Subject: reftable/stack: fix stale lock when dying When starting a transaction via `reftable_stack_init_addition()`, we create a lockfile for the reftable stack itself which we'll write the new list of tables to. But if we terminate abnormally e.g. via a call to `die()`, then we do not remove the lockfile. Subsequent executions of Git which try to modify references will thus fail with an out-of-date error. Fix this bug by registering the lock as a `struct tempfile`, which ensures automatic cleanup for us. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 47 +++++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index 2dd2373360..0c235724e2 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -17,6 +17,8 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-merged.h" #include "writer.h" +#include "tempfile.h" + static int stack_try_add(struct reftable_stack *st, int (*write_table)(struct reftable_writer *wr, void *arg), @@ -440,8 +442,7 @@ static void format_name(struct strbuf *dest, uint64_t min, uint64_t max) } struct reftable_addition { - int lock_file_fd; - struct strbuf lock_file_name; + struct tempfile *lock_file; struct reftable_stack *stack; char **new_tables; @@ -449,24 +450,19 @@ struct reftable_addition { uint64_t next_update_index; }; -#define REFTABLE_ADDITION_INIT \ - { \ - .lock_file_name = STRBUF_INIT \ - } +#define REFTABLE_ADDITION_INIT {0} static int reftable_stack_init_addition(struct reftable_addition *add, struct reftable_stack *st) { + struct strbuf lock_file_name = STRBUF_INIT; int err = 0; add->stack = st; - strbuf_reset(&add->lock_file_name); - strbuf_addstr(&add->lock_file_name, st->list_file); - strbuf_addstr(&add->lock_file_name, ".lock"); + strbuf_addf(&lock_file_name, "%s.lock", st->list_file); - add->lock_file_fd = open(add->lock_file_name.buf, - O_EXCL | O_CREAT | O_WRONLY, 0666); - if (add->lock_file_fd < 0) { + add->lock_file = create_tempfile(lock_file_name.buf); + if (!add->lock_file) { if (errno == EEXIST) { err = REFTABLE_LOCK_ERROR; } else { @@ -475,7 +471,7 @@ static int reftable_stack_init_addition(struct reftable_addition *add, goto done; } if (st->config.default_permissions) { - if (chmod(add->lock_file_name.buf, st->config.default_permissions) < 0) { + if (chmod(add->lock_file->filename.buf, st->config.default_permissions) < 0) { err = REFTABLE_IO_ERROR; goto done; } @@ -495,6 +491,7 @@ done: if (err) { reftable_addition_close(add); } + strbuf_release(&lock_file_name); return err; } @@ -512,15 +509,7 @@ static void reftable_addition_close(struct reftable_addition *add) add->new_tables = NULL; add->new_tables_len = 0; - if (add->lock_file_fd > 0) { - close(add->lock_file_fd); - add->lock_file_fd = 0; - } - if (add->lock_file_name.len > 0) { - unlink(add->lock_file_name.buf); - strbuf_release(&add->lock_file_name); - } - + delete_tempfile(&add->lock_file); strbuf_release(&nm); } @@ -536,8 +525,10 @@ void reftable_addition_destroy(struct reftable_addition *add) int reftable_addition_commit(struct reftable_addition *add) { struct strbuf table_list = STRBUF_INIT; + int lock_file_fd = get_tempfile_fd(add->lock_file); int i = 0; int err = 0; + if (add->new_tables_len == 0) goto done; @@ -550,28 +541,20 @@ int reftable_addition_commit(struct reftable_addition *add) strbuf_addstr(&table_list, "\n"); } - err = write_in_full(add->lock_file_fd, table_list.buf, table_list.len); + err = write_in_full(lock_file_fd, table_list.buf, table_list.len); strbuf_release(&table_list); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; } - err = close(add->lock_file_fd); - add->lock_file_fd = 0; - if (err < 0) { - err = REFTABLE_IO_ERROR; - goto done; - } - - err = rename(add->lock_file_name.buf, add->stack->list_file); + err = rename_tempfile(&add->lock_file, add->stack->list_file); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; } /* success, no more state to clean up. */ - strbuf_release(&add->lock_file_name); for (i = 0; i < add->new_tables_len; i++) { reftable_free(add->new_tables[i]); } -- cgit 1.2.3-korg From 9abda98149e0f164ac0532fc6ca89b337049b9c3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:59 +0100 Subject: reftable/stack: fix use of unseeded randomness When writing a new reftable stack, Git will first create the stack with a random suffix so that concurrent updates will not try to write to the same file. This random suffix is computed via a call to rand(3P). But we never seed the function via srand(3P), which means that the suffix is in fact always the same. Fix this bug by using `git_rand()` instead, which does not need to be initialized. While this function is likely going to be slower depending on the platform, this slowness should not matter in practice as we only use it when writing a new reftable stack. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/readwrite_test.c | 6 +++--- reftable/stack.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c index 469ab79a5a..278663f22d 100644 --- a/reftable/readwrite_test.c +++ b/reftable/readwrite_test.c @@ -141,8 +141,8 @@ static void test_log_buffer_size(void) */ uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ]; for (i = 0; i < GIT_SHA1_RAWSZ; i++) { - hash1[i] = (uint8_t)(rand() % 256); - hash2[i] = (uint8_t)(rand() % 256); + hash1[i] = (uint8_t)(git_rand() % 256); + hash2[i] = (uint8_t)(git_rand() % 256); } log.value.update.old_hash = hash1; log.value.update.new_hash = hash2; @@ -320,7 +320,7 @@ static void test_log_zlib_corruption(void) }; for (i = 0; i < sizeof(message) - 1; i++) - message[i] = (uint8_t)(rand() % 64 + ' '); + message[i] = (uint8_t)(git_rand() % 64 + ' '); reftable_writer_set_limits(w, 1, 1); diff --git a/reftable/stack.c b/reftable/stack.c index 0c235724e2..16bab82063 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -434,7 +434,7 @@ int reftable_stack_add(struct reftable_stack *st, static void format_name(struct strbuf *dest, uint64_t min, uint64_t max) { char buf[100]; - uint32_t rnd = (uint32_t)rand(); + uint32_t rnd = (uint32_t)git_rand(); snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x", min, max, rnd); strbuf_reset(dest); -- cgit 1.2.3-korg From 829231dc203f777a32ebdbbaf66e7661a21ac74a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:08:03 +0100 Subject: reftable/merged: reuse buffer to compute record keys When iterating over entries in the merged iterator's queue, we compute the key of each of the entries and write it into a buffer. We do not reuse the buffer though and thus re-allocate it on every iteration, which is wasteful given that we never transfer ownership of the allocated bytes outside of the loop. Refactor the code to reuse the buffer. This also fixes a potential memory leak when `merged_iter_advance_subiter()` returns an error. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 31 ++++++++++++++++--------------- reftable/merged.h | 2 ++ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/reftable/merged.c b/reftable/merged.c index 5ded470c08..556bb5c556 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -52,6 +52,8 @@ static void merged_iter_close(void *p) reftable_iterator_destroy(&mi->stack[i]); } reftable_free(mi->stack); + strbuf_release(&mi->key); + strbuf_release(&mi->entry_key); } static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, @@ -85,7 +87,6 @@ static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) static int merged_iter_next_entry(struct merged_iter *mi, struct reftable_record *rec) { - struct strbuf entry_key = STRBUF_INIT; struct pq_entry entry = { 0 }; int err = 0; @@ -105,33 +106,31 @@ static int merged_iter_next_entry(struct merged_iter *mi, such a deployment, the loop below must be changed to collect all entries for the same key, and return new the newest one. */ - reftable_record_key(&entry.rec, &entry_key); + reftable_record_key(&entry.rec, &mi->entry_key); while (!merged_iter_pqueue_is_empty(mi->pq)) { struct pq_entry top = merged_iter_pqueue_top(mi->pq); - struct strbuf k = STRBUF_INIT; - int err = 0, cmp = 0; + int cmp = 0; - reftable_record_key(&top.rec, &k); + reftable_record_key(&top.rec, &mi->key); - cmp = strbuf_cmp(&k, &entry_key); - strbuf_release(&k); - - if (cmp > 0) { + cmp = strbuf_cmp(&mi->key, &mi->entry_key); + if (cmp > 0) break; - } merged_iter_pqueue_remove(&mi->pq); err = merged_iter_advance_subiter(mi, top.index); - if (err < 0) { - return err; - } + if (err < 0) + goto done; reftable_record_release(&top.rec); } reftable_record_copy_from(rec, &entry.rec, hash_size(mi->hash_id)); + +done: reftable_record_release(&entry.rec); - strbuf_release(&entry_key); - return 0; + strbuf_release(&mi->entry_key); + strbuf_release(&mi->key); + return err; } static int merged_iter_next(struct merged_iter *mi, struct reftable_record *rec) @@ -248,6 +247,8 @@ static int merged_table_seek_record(struct reftable_merged_table *mt, .typ = reftable_record_type(rec), .hash_id = mt->hash_id, .suppress_deletions = mt->suppress_deletions, + .key = STRBUF_INIT, + .entry_key = STRBUF_INIT, }; int n = 0; int err = 0; diff --git a/reftable/merged.h b/reftable/merged.h index 7d9f95d27e..d5b39dfe7f 100644 --- a/reftable/merged.h +++ b/reftable/merged.h @@ -31,6 +31,8 @@ struct merged_iter { uint8_t typ; int suppress_deletions; struct merged_iter_pqueue pq; + struct strbuf key; + struct strbuf entry_key; }; void merged_table_release(struct reftable_merged_table *mt); -- cgit 1.2.3-korg From a8305bc6d8eb55cf52264964c8fc0289b72f2fd8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:08:07 +0100 Subject: reftable/block: introduce macro to initialize `struct block_iter` There are a bunch of locations where we initialize members of `struct block_iter`, which makes it harder than necessary to expand this struct to have additional members. Unify the logic via a new `BLOCK_ITER_INIT` macro that initializes all members. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 4 +--- reftable/block.h | 4 ++++ reftable/block_test.c | 4 ++-- reftable/iter.h | 8 ++++---- reftable/reader.c | 7 +++---- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index 34d4d07369..8c6a8c77fc 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -389,9 +389,7 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it, struct reftable_record rec = reftable_new_record(block_reader_type(br)); struct strbuf key = STRBUF_INIT; int err = 0; - struct block_iter next = { - .last_key = STRBUF_INIT, - }; + struct block_iter next = BLOCK_ITER_INIT; int i = binsearch(br->restart_count, &restart_key_less, &args); if (args.error) { diff --git a/reftable/block.h b/reftable/block.h index 87c77539b5..51699af233 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -86,6 +86,10 @@ struct block_iter { struct strbuf last_key; }; +#define BLOCK_ITER_INIT { \ + .last_key = STRBUF_INIT, \ +} + /* initializes a block reader. */ int block_reader_init(struct block_reader *br, struct reftable_block *bl, uint32_t header_off, uint32_t table_block_size, diff --git a/reftable/block_test.c b/reftable/block_test.c index cb88af4a56..c00bbc8aed 100644 --- a/reftable/block_test.c +++ b/reftable/block_test.c @@ -32,7 +32,7 @@ static void test_block_read_write(void) int i = 0; int n; struct block_reader br = { 0 }; - struct block_iter it = { .last_key = STRBUF_INIT }; + struct block_iter it = BLOCK_ITER_INIT; int j = 0; struct strbuf want = STRBUF_INIT; @@ -87,7 +87,7 @@ static void test_block_read_write(void) block_iter_close(&it); for (i = 0; i < N; i++) { - struct block_iter it = { .last_key = STRBUF_INIT }; + struct block_iter it = BLOCK_ITER_INIT; strbuf_reset(&want); strbuf_addstr(&want, names[i]); diff --git a/reftable/iter.h b/reftable/iter.h index 09eb0cbfa5..47d67d84df 100644 --- a/reftable/iter.h +++ b/reftable/iter.h @@ -53,10 +53,10 @@ struct indexed_table_ref_iter { int is_finished; }; -#define INDEXED_TABLE_REF_ITER_INIT \ - { \ - .cur = { .last_key = STRBUF_INIT }, .oid = STRBUF_INIT, \ - } +#define INDEXED_TABLE_REF_ITER_INIT { \ + .cur = BLOCK_ITER_INIT, \ + .oid = STRBUF_INIT, \ +} void iterator_from_indexed_table_ref_iter(struct reftable_iterator *it, struct indexed_table_ref_iter *itr); diff --git a/reftable/reader.c b/reftable/reader.c index b4db23ce18..9de64f50b4 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -224,10 +224,9 @@ struct table_iter { struct block_iter bi; int is_finished; }; -#define TABLE_ITER_INIT \ - { \ - .bi = {.last_key = STRBUF_INIT } \ - } +#define TABLE_ITER_INIT { \ + .bi = BLOCK_ITER_INIT \ +} static void table_iter_copy_from(struct table_iter *dest, struct table_iter *src) -- cgit 1.2.3-korg From c0cadb0576d4920915eb3bd38a7d1abfcbd25f98 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:08:12 +0100 Subject: reftable/block: reuse buffer to compute record keys When iterating over entries in the block iterator we compute the key of each of the entries and write it into a buffer. We do not reuse the buffer though and thus re-allocate it on every iteration, which is wasteful. Refactor the code to reuse the buffer. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 19 ++++++++----------- reftable/block.h | 2 ++ 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index 8c6a8c77fc..1df3d8a0f0 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -323,30 +323,28 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec) .len = it->br->block_len - it->next_off, }; struct string_view start = in; - struct strbuf key = STRBUF_INIT; uint8_t extra = 0; int n = 0; if (it->next_off >= it->br->block_len) return 1; - n = reftable_decode_key(&key, &extra, it->last_key, in); + n = reftable_decode_key(&it->key, &extra, it->last_key, in); if (n < 0) return -1; - if (!key.len) + if (!it->key.len) return REFTABLE_FORMAT_ERROR; string_view_consume(&in, n); - n = reftable_record_decode(rec, key, extra, in, it->br->hash_size); + n = reftable_record_decode(rec, it->key, extra, in, it->br->hash_size); if (n < 0) return -1; string_view_consume(&in, n); strbuf_reset(&it->last_key); - strbuf_addbuf(&it->last_key, &key); + strbuf_addbuf(&it->last_key, &it->key); it->next_off += start.len - in.len; - strbuf_release(&key); return 0; } @@ -377,6 +375,7 @@ int block_iter_seek(struct block_iter *it, struct strbuf *want) void block_iter_close(struct block_iter *it) { strbuf_release(&it->last_key); + strbuf_release(&it->key); } int block_reader_seek(struct block_reader *br, struct block_iter *it, @@ -387,7 +386,6 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it, .r = br, }; struct reftable_record rec = reftable_new_record(block_reader_type(br)); - struct strbuf key = STRBUF_INIT; int err = 0; struct block_iter next = BLOCK_ITER_INIT; @@ -414,8 +412,8 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it, if (err < 0) goto done; - reftable_record_key(&rec, &key); - if (err > 0 || strbuf_cmp(&key, want) >= 0) { + reftable_record_key(&rec, &it->key); + if (err > 0 || strbuf_cmp(&it->key, want) >= 0) { err = 0; goto done; } @@ -424,8 +422,7 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it, } done: - strbuf_release(&key); - strbuf_release(&next.last_key); + block_iter_close(&next); reftable_record_release(&rec); return err; diff --git a/reftable/block.h b/reftable/block.h index 51699af233..17481e6331 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -84,10 +84,12 @@ struct block_iter { /* key for last entry we read. */ struct strbuf last_key; + struct strbuf key; }; #define BLOCK_ITER_INIT { \ .last_key = STRBUF_INIT, \ + .key = STRBUF_INIT, \ } /* initializes a block reader. */ -- cgit 1.2.3-korg