From f6b58c1be40ba4bd6e7f2364acfe5fa34ce04120 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 6 Feb 2024 07:35:23 +0100 Subject: reftable: introduce macros to grow arrays Throughout the reftable library we have many cases where we need to grow arrays. In order to avoid too many reallocations, we roughly double the capacity of the array on each iteration. The resulting code pattern is duplicated across many sites. We have similar patterns in our main codebase, which is why we have eventually introduced an `ALLOC_GROW()` macro to abstract it away and avoid some code duplication. We cannot easily reuse this macro here though because `ALLOC_GROW()` uses `REALLOC_ARRAY()`, which in turn will call realloc(3P) to grow the array. The reftable code is structured as a library though (even if the boundaries are fuzzy), and one property this brings with it is that it is possible to plug in your own allocators. So instead of using realloc(3P), we need to use `reftable_realloc()` that knows to use the user-provided implementation. So let's introduce two new macros `REFTABLE_REALLOC_ARRAY()` and `REFTABLE_ALLOC_GROW()` that mirror what we do in our main codebase, with two modifications: - They use `reftable_realloc()`, as explained above. - They use a different growth factor of `2 * cap + 1` instead of `(cap + 16) * 3 / 2`. The second change is because we know a bit more about the allocation patterns in the reftable library. In most cases, we end up only having a handful of items in the array and don't end up growing them. The initial capacity that our normal growth factor uses (which is 24) would thus end up over-allocating in a lot of code paths. This effect is measurable: - Before change: HEAP SUMMARY: in use at exit: 671,983 bytes in 152 blocks total heap usage: 3,843,446 allocs, 3,843,294 frees, 223,761,402 bytes allocated - After change with a growth factor of `(2 * alloc + 1)`: HEAP SUMMARY: in use at exit: 671,983 bytes in 152 blocks total heap usage: 3,843,446 allocs, 3,843,294 frees, 223,761,410 bytes allocated - After change with a growth factor of `(alloc + 16)* 2 / 3`: HEAP SUMMARY: in use at exit: 671,983 bytes in 152 blocks total heap usage: 3,833,673 allocs, 3,833,521 frees, 4,728,251,742 bytes allocated While the total heap usage is roughly the same, we do end up allocating significantly more bytes with our usual growth factor (in fact, roughly 21 times as many). Convert the reftable library to use these new macros. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/basics.c | 8 ++------ reftable/basics.h | 11 +++++++++++ reftable/block.c | 7 +------ reftable/merged_test.c | 20 ++++++-------------- reftable/pq.c | 8 ++------ reftable/stack.c | 29 ++++++++++++----------------- reftable/writer.c | 14 ++------------ 7 files changed, 36 insertions(+), 61 deletions(-) diff --git a/reftable/basics.c b/reftable/basics.c index f761e48028..af9004cec2 100644 --- a/reftable/basics.c +++ b/reftable/basics.c @@ -89,17 +89,13 @@ void parse_names(char *buf, int size, char ***namesp) next = end; } if (p < next) { - if (names_len == names_cap) { - names_cap = 2 * names_cap + 1; - names = reftable_realloc( - names, names_cap * sizeof(*names)); - } + REFTABLE_ALLOC_GROW(names, names_len + 1, names_cap); names[names_len++] = xstrdup(p); } p = next + 1; } - names = reftable_realloc(names, (names_len + 1) * sizeof(*names)); + REFTABLE_REALLOC_ARRAY(names, names_len + 1); names[names_len] = NULL; *namesp = names; } diff --git a/reftable/basics.h b/reftable/basics.h index 096b36862b..2f855cd724 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -53,6 +53,17 @@ void *reftable_realloc(void *p, size_t sz); void reftable_free(void *p); void *reftable_calloc(size_t sz); +#define REFTABLE_REALLOC_ARRAY(x, alloc) (x) = reftable_realloc((x), st_mult(sizeof(*(x)), (alloc))) +#define REFTABLE_ALLOC_GROW(x, nr, alloc) \ + do { \ + if ((nr) > alloc) { \ + alloc = 2 * (alloc) + 1; \ + if (alloc < (nr)) \ + alloc = (nr); \ + REFTABLE_REALLOC_ARRAY(x, alloc); \ + } \ + } while (0) + /* Find the longest shared prefix size of `a` and `b` */ struct strbuf; int common_prefix_size(struct strbuf *a, struct strbuf *b); diff --git a/reftable/block.c b/reftable/block.c index 1df3d8a0f0..6952d0facf 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -51,12 +51,7 @@ static int block_writer_register_restart(struct block_writer *w, int n, if (2 + 3 * rlen + n > w->block_size - w->next) return -1; if (is_restart) { - if (w->restart_len == w->restart_cap) { - w->restart_cap = w->restart_cap * 2 + 1; - w->restarts = reftable_realloc( - w->restarts, sizeof(uint32_t) * w->restart_cap); - } - + REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap); w->restarts[w->restart_len++] = w->next; } diff --git a/reftable/merged_test.c b/reftable/merged_test.c index 46908f738f..e05351e035 100644 --- a/reftable/merged_test.c +++ b/reftable/merged_test.c @@ -231,14 +231,10 @@ static void test_merged(void) while (len < 100) { /* cap loops/recursion. */ struct reftable_ref_record ref = { NULL }; int err = reftable_iterator_next_ref(&it, &ref); - if (err > 0) { + if (err > 0) break; - } - if (len == cap) { - cap = 2 * cap + 1; - out = reftable_realloc( - out, sizeof(struct reftable_ref_record) * cap); - } + + REFTABLE_ALLOC_GROW(out, len + 1, cap); out[len++] = ref; } reftable_iterator_destroy(&it); @@ -368,14 +364,10 @@ static void test_merged_logs(void) while (len < 100) { /* cap loops/recursion. */ struct reftable_log_record log = { NULL }; int err = reftable_iterator_next_log(&it, &log); - if (err > 0) { + if (err > 0) break; - } - if (len == cap) { - cap = 2 * cap + 1; - out = reftable_realloc( - out, sizeof(struct reftable_log_record) * cap); - } + + REFTABLE_ALLOC_GROW(out, len + 1, cap); out[len++] = log; } reftable_iterator_destroy(&it); diff --git a/reftable/pq.c b/reftable/pq.c index dcefeb793a..2461daf5ff 100644 --- a/reftable/pq.c +++ b/reftable/pq.c @@ -75,13 +75,9 @@ void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry { int i = 0; - if (pq->len == pq->cap) { - pq->cap = 2 * pq->cap + 1; - pq->heap = reftable_realloc(pq->heap, - pq->cap * sizeof(struct pq_entry)); - } - + REFTABLE_ALLOC_GROW(pq->heap, pq->len + 1, pq->cap); pq->heap[pq->len++] = *e; + i = pq->len - 1; while (i > 0) { int j = (i - 1) / 2; diff --git a/reftable/stack.c b/reftable/stack.c index bf3869ce70..1dfab99e96 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -551,7 +551,7 @@ struct reftable_addition { struct reftable_stack *stack; char **new_tables; - int new_tables_len; + size_t new_tables_len, new_tables_cap; uint64_t next_update_index; }; @@ -602,8 +602,9 @@ done: static void reftable_addition_close(struct reftable_addition *add) { - int i = 0; struct strbuf nm = STRBUF_INIT; + size_t i; + for (i = 0; i < add->new_tables_len; i++) { stack_filename(&nm, add->stack, add->new_tables[i]); unlink(nm.buf); @@ -613,6 +614,7 @@ static void reftable_addition_close(struct reftable_addition *add) reftable_free(add->new_tables); add->new_tables = NULL; add->new_tables_len = 0; + add->new_tables_cap = 0; delete_tempfile(&add->lock_file); strbuf_release(&nm); @@ -631,8 +633,8 @@ int reftable_addition_commit(struct reftable_addition *add) { struct strbuf table_list = STRBUF_INIT; int lock_file_fd = get_tempfile_fd(add->lock_file); - int i = 0; int err = 0; + size_t i; if (add->new_tables_len == 0) goto done; @@ -660,12 +662,12 @@ int reftable_addition_commit(struct reftable_addition *add) } /* success, no more state to clean up. */ - for (i = 0; i < add->new_tables_len; i++) { + for (i = 0; i < add->new_tables_len; i++) reftable_free(add->new_tables[i]); - } reftable_free(add->new_tables); add->new_tables = NULL; add->new_tables_len = 0; + add->new_tables_cap = 0; err = reftable_stack_reload_maybe_reuse(add->stack, 1); if (err) @@ -792,11 +794,9 @@ int reftable_addition_add(struct reftable_addition *add, goto done; } - add->new_tables = reftable_realloc(add->new_tables, - sizeof(*add->new_tables) * - (add->new_tables_len + 1)); - add->new_tables[add->new_tables_len] = strbuf_detach(&next_name, NULL); - add->new_tables_len++; + REFTABLE_ALLOC_GROW(add->new_tables, add->new_tables_len + 1, + add->new_tables_cap); + add->new_tables[add->new_tables_len++] = strbuf_detach(&next_name, NULL); done: if (tab_fd > 0) { close(tab_fd); @@ -1367,17 +1367,12 @@ static int stack_check_addition(struct reftable_stack *st, while (1) { struct reftable_ref_record ref = { NULL }; err = reftable_iterator_next_ref(&it, &ref); - if (err > 0) { + if (err > 0) break; - } if (err < 0) goto done; - if (len >= cap) { - cap = 2 * cap + 1; - refs = reftable_realloc(refs, cap * sizeof(refs[0])); - } - + REFTABLE_ALLOC_GROW(refs, len + 1, cap); refs[len++] = ref; } diff --git a/reftable/writer.c b/reftable/writer.c index ee4590e20f..4483bb21c3 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -200,12 +200,7 @@ static void writer_index_hash(struct reftable_writer *w, struct strbuf *hash) return; } - if (key->offset_len == key->offset_cap) { - key->offset_cap = 2 * key->offset_cap + 1; - key->offsets = reftable_realloc( - key->offsets, sizeof(uint64_t) * key->offset_cap); - } - + REFTABLE_ALLOC_GROW(key->offsets, key->offset_len + 1, key->offset_cap); key->offsets[key->offset_len++] = off; } @@ -674,12 +669,7 @@ static int writer_flush_nonempty_block(struct reftable_writer *w) if (err < 0) return err; - if (w->index_cap == w->index_len) { - w->index_cap = 2 * w->index_cap + 1; - w->index = reftable_realloc( - w->index, - sizeof(struct reftable_index_record) * w->index_cap); - } + REFTABLE_ALLOC_GROW(w->index, w->index_len + 1, w->index_cap); ir.offset = w->next; strbuf_reset(&ir.last_key); -- cgit 1.2.3-korg From b4ff12c8eefff9cba73ba3cb7492111adfa31d87 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 6 Feb 2024 07:35:27 +0100 Subject: reftable: introduce macros to allocate arrays Similar to the preceding commit, let's carry over macros to allocate arrays with `REFTABLE_ALLOC_ARRAY()` and `REFTABLE_CALLOC_ARRAY()`. This requires us to change the signature of `reftable_calloc()`, which only takes a single argument right now and thus puts the burden on the caller to calculate the final array's size. This is a net improvement though as it means that we can now provide proper overflow checks when multiplying the array size with the member size. Convert callsites of `reftable_calloc()` to the new signature and start using the new macros where possible. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/basics.h | 4 +++- reftable/block.c | 10 ++++++---- reftable/block_test.c | 2 +- reftable/blocksource.c | 4 ++-- reftable/iter.c | 3 +-- reftable/merged.c | 4 ++-- reftable/merged_test.c | 22 +++++++++++++--------- reftable/publicbasics.c | 3 ++- reftable/reader.c | 8 +++----- reftable/readwrite_test.c | 8 +++++--- reftable/record.c | 14 ++++++++------ reftable/record_test.c | 4 ++-- reftable/refname.c | 4 ++-- reftable/stack.c | 28 +++++++++++++--------------- reftable/tree.c | 4 ++-- reftable/writer.c | 7 +++---- 16 files changed, 68 insertions(+), 61 deletions(-) diff --git a/reftable/basics.h b/reftable/basics.h index 2f855cd724..4c3ac963a3 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -51,8 +51,10 @@ int names_length(char **names); void *reftable_malloc(size_t sz); void *reftable_realloc(void *p, size_t sz); void reftable_free(void *p); -void *reftable_calloc(size_t sz); +void *reftable_calloc(size_t nelem, size_t elsize); +#define REFTABLE_ALLOC_ARRAY(x, alloc) (x) = reftable_malloc(st_mult(sizeof(*(x)), (alloc))) +#define REFTABLE_CALLOC_ARRAY(x, alloc) (x) = reftable_calloc((alloc), sizeof(*(x))) #define REFTABLE_REALLOC_ARRAY(x, alloc) (x) = reftable_realloc((x), st_mult(sizeof(*(x)), (alloc))) #define REFTABLE_ALLOC_GROW(x, nr, alloc) \ do { \ diff --git a/reftable/block.c b/reftable/block.c index 6952d0facf..838759823a 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -143,8 +143,10 @@ int block_writer_finish(struct block_writer *w) int block_header_skip = 4 + w->header_off; uLongf src_len = w->next - block_header_skip; uLongf dest_cap = src_len * 1.001 + 12; + uint8_t *compressed; + + REFTABLE_ALLOC_ARRAY(compressed, dest_cap); - uint8_t *compressed = reftable_malloc(dest_cap); while (1) { uLongf out_dest_len = dest_cap; int zresult = compress2(compressed, &out_dest_len, @@ -201,9 +203,9 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block, uLongf dst_len = sz - block_header_skip; /* total size of dest buffer. */ uLongf src_len = block->len - block_header_skip; - /* Log blocks specify the *uncompressed* size in their header. - */ - uncompressed = reftable_malloc(sz); + + /* Log blocks specify the *uncompressed* size in their header. */ + REFTABLE_ALLOC_ARRAY(uncompressed, sz); /* Copy over the block header verbatim. It's not compressed. */ memcpy(uncompressed, block->data, block_header_skip); diff --git a/reftable/block_test.c b/reftable/block_test.c index dedb05c7d8..e162c6e33f 100644 --- a/reftable/block_test.c +++ b/reftable/block_test.c @@ -36,7 +36,7 @@ static void test_block_read_write(void) int j = 0; struct strbuf want = STRBUF_INIT; - block.data = reftable_calloc(block_size); + REFTABLE_CALLOC_ARRAY(block.data, block_size); block.len = block_size; block.source = malloc_block_source(); block_writer_init(&bw, BLOCK_TYPE_REF, block.data, block_size, diff --git a/reftable/blocksource.c b/reftable/blocksource.c index 8c41e3c70f..eeed254ba9 100644 --- a/reftable/blocksource.c +++ b/reftable/blocksource.c @@ -29,7 +29,7 @@ static int strbuf_read_block(void *v, struct reftable_block *dest, uint64_t off, { struct strbuf *b = v; assert(off + size <= b->len); - dest->data = reftable_calloc(size); + REFTABLE_CALLOC_ARRAY(dest->data, size); memcpy(dest->data, b->buf + off, size); dest->len = size; return size; @@ -132,7 +132,7 @@ int reftable_block_source_from_file(struct reftable_block_source *bs, return REFTABLE_IO_ERROR; } - p = reftable_calloc(sizeof(*p)); + REFTABLE_CALLOC_ARRAY(p, 1); p->size = st.st_size; p->data = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); diff --git a/reftable/iter.c b/reftable/iter.c index a8d174c040..8b5ebf6183 100644 --- a/reftable/iter.c +++ b/reftable/iter.c @@ -160,8 +160,7 @@ int new_indexed_table_ref_iter(struct indexed_table_ref_iter **dest, int oid_len, uint64_t *offsets, int offset_len) { struct indexed_table_ref_iter empty = INDEXED_TABLE_REF_ITER_INIT; - struct indexed_table_ref_iter *itr = - reftable_calloc(sizeof(struct indexed_table_ref_iter)); + struct indexed_table_ref_iter *itr = reftable_calloc(1, sizeof(*itr)); int err = 0; *itr = empty; diff --git a/reftable/merged.c b/reftable/merged.c index c258ce953e..2031fd51b4 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -190,7 +190,7 @@ int reftable_new_merged_table(struct reftable_merged_table **dest, } } - m = reftable_calloc(sizeof(struct reftable_merged_table)); + REFTABLE_CALLOC_ARRAY(m, 1); m->stack = stack; m->stack_len = n; m->min = first_min; @@ -240,7 +240,7 @@ static int merged_table_seek_record(struct reftable_merged_table *mt, struct reftable_record *rec) { struct reftable_iterator *iters = reftable_calloc( - sizeof(struct reftable_iterator) * mt->stack_len); + mt->stack_len, sizeof(*iters)); struct merged_iter merged = { .stack = iters, .typ = reftable_record_type(rec), diff --git a/reftable/merged_test.c b/reftable/merged_test.c index e05351e035..e233a9d581 100644 --- a/reftable/merged_test.c +++ b/reftable/merged_test.c @@ -93,10 +93,12 @@ merged_table_from_records(struct reftable_ref_record **refs, int i = 0; struct reftable_merged_table *mt = NULL; int err; - struct reftable_table *tabs = - reftable_calloc(n * sizeof(struct reftable_table)); - *readers = reftable_calloc(n * sizeof(struct reftable_reader *)); - *source = reftable_calloc(n * sizeof(**source)); + struct reftable_table *tabs; + + REFTABLE_CALLOC_ARRAY(tabs, n); + REFTABLE_CALLOC_ARRAY(*readers, n); + REFTABLE_CALLOC_ARRAY(*source, n); + for (i = 0; i < n; i++) { write_test_table(&buf[i], refs[i], sizes[i]); block_source_from_strbuf(&(*source)[i], &buf[i]); @@ -266,10 +268,12 @@ merged_table_from_log_records(struct reftable_log_record **logs, int i = 0; struct reftable_merged_table *mt = NULL; int err; - struct reftable_table *tabs = - reftable_calloc(n * sizeof(struct reftable_table)); - *readers = reftable_calloc(n * sizeof(struct reftable_reader *)); - *source = reftable_calloc(n * sizeof(**source)); + struct reftable_table *tabs; + + REFTABLE_CALLOC_ARRAY(tabs, n); + REFTABLE_CALLOC_ARRAY(*readers, n); + REFTABLE_CALLOC_ARRAY(*source, n); + for (i = 0; i < n; i++) { write_test_log_table(&buf[i], logs[i], sizes[i], i + 1); block_source_from_strbuf(&(*source)[i], &buf[i]); @@ -412,7 +416,7 @@ static void test_default_write_opts(void) }; int err; struct reftable_block_source source = { NULL }; - struct reftable_table *tab = reftable_calloc(sizeof(*tab) * 1); + struct reftable_table *tab = reftable_calloc(1, sizeof(*tab)); uint32_t hash_id; struct reftable_reader *rd = NULL; struct reftable_merged_table *merged = NULL; diff --git a/reftable/publicbasics.c b/reftable/publicbasics.c index bcb82530d6..44b84a125e 100644 --- a/reftable/publicbasics.c +++ b/reftable/publicbasics.c @@ -37,8 +37,9 @@ void reftable_free(void *p) free(p); } -void *reftable_calloc(size_t sz) +void *reftable_calloc(size_t nelem, size_t elsize) { + size_t sz = st_mult(nelem, elsize); void *p = reftable_malloc(sz); memset(p, 0, sz); return p; diff --git a/reftable/reader.c b/reftable/reader.c index 64dc366fb1..3e0de5e8ad 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -539,8 +539,7 @@ static int reader_seek_indexed(struct reftable_reader *r, if (err == 0) { struct table_iter empty = TABLE_ITER_INIT; - struct table_iter *malloced = - reftable_calloc(sizeof(struct table_iter)); + struct table_iter *malloced = reftable_calloc(1, sizeof(*malloced)); *malloced = empty; table_iter_copy_from(malloced, &next); iterator_from_table_iter(it, malloced); @@ -635,8 +634,7 @@ void reader_close(struct reftable_reader *r) int reftable_new_reader(struct reftable_reader **p, struct reftable_block_source *src, char const *name) { - struct reftable_reader *rd = - reftable_calloc(sizeof(struct reftable_reader)); + struct reftable_reader *rd = reftable_calloc(1, sizeof(*rd)); int err = init_reader(rd, src, name); if (err == 0) { *p = rd; @@ -711,7 +709,7 @@ static int reftable_reader_refs_for_unindexed(struct reftable_reader *r, uint8_t *oid) { struct table_iter ti_empty = TABLE_ITER_INIT; - struct table_iter *ti = reftable_calloc(sizeof(struct table_iter)); + struct table_iter *ti = reftable_calloc(1, sizeof(*ti)); struct filtering_ref_iterator *filter = NULL; struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT; int oid_len = hash_size(r->hash_id); diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c index b8a3224016..91ea7a10ef 100644 --- a/reftable/readwrite_test.c +++ b/reftable/readwrite_test.c @@ -56,7 +56,9 @@ static void write_table(char ***names, struct strbuf *buf, int N, int i = 0, n; struct reftable_log_record log = { NULL }; const struct reftable_stats *stats = NULL; - *names = reftable_calloc(sizeof(char *) * (N + 1)); + + REFTABLE_CALLOC_ARRAY(*names, N + 1); + reftable_writer_set_limits(w, update_index, update_index); for (i = 0; i < N; i++) { char name[100]; @@ -188,7 +190,7 @@ static void test_log_overflow(void) static void test_log_write_read(void) { int N = 2; - char **names = reftable_calloc(sizeof(char *) * (N + 1)); + char **names = reftable_calloc(N + 1, sizeof(*names)); int err; struct reftable_write_options opts = { .block_size = 256, @@ -519,7 +521,7 @@ static void test_table_read_write_seek_index(void) static void test_table_refs_for(int indexed) { int N = 50; - char **want_names = reftable_calloc(sizeof(char *) * (N + 1)); + char **want_names = reftable_calloc(N + 1, sizeof(*want_names)); int want_names_len = 0; uint8_t want_hash[GIT_SHA1_RAWSZ]; diff --git a/reftable/record.c b/reftable/record.c index 5c3fbb7b2a..2f3cd6b62c 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -497,12 +497,13 @@ static void reftable_obj_record_copy_from(void *rec, const void *src_rec, (const struct reftable_obj_record *)src_rec; reftable_obj_record_release(obj); - obj->hash_prefix = reftable_malloc(src->hash_prefix_len); + + REFTABLE_ALLOC_ARRAY(obj->hash_prefix, src->hash_prefix_len); obj->hash_prefix_len = src->hash_prefix_len; if (src->hash_prefix_len) memcpy(obj->hash_prefix, src->hash_prefix, obj->hash_prefix_len); - obj->offsets = reftable_malloc(src->offset_len * sizeof(uint64_t)); + REFTABLE_ALLOC_ARRAY(obj->offsets, src->offset_len); obj->offset_len = src->offset_len; COPY_ARRAY(obj->offsets, src->offsets, src->offset_len); } @@ -559,7 +560,8 @@ static int reftable_obj_record_decode(void *rec, struct strbuf key, int n = 0; uint64_t last; int j; - r->hash_prefix = reftable_malloc(key.len); + + REFTABLE_ALLOC_ARRAY(r->hash_prefix, key.len); memcpy(r->hash_prefix, key.buf, key.len); r->hash_prefix_len = key.len; @@ -577,7 +579,7 @@ static int reftable_obj_record_decode(void *rec, struct strbuf key, if (count == 0) return start.len - in.len; - r->offsets = reftable_malloc(count * sizeof(uint64_t)); + REFTABLE_ALLOC_ARRAY(r->offsets, count); r->offset_len = count; n = get_var_int(&r->offsets[0], &in); @@ -715,12 +717,12 @@ static void reftable_log_record_copy_from(void *rec, const void *src_rec, } if (dst->value.update.new_hash) { - dst->value.update.new_hash = reftable_malloc(hash_size); + REFTABLE_ALLOC_ARRAY(dst->value.update.new_hash, hash_size); memcpy(dst->value.update.new_hash, src->value.update.new_hash, hash_size); } if (dst->value.update.old_hash) { - dst->value.update.old_hash = reftable_malloc(hash_size); + REFTABLE_ALLOC_ARRAY(dst->value.update.old_hash, hash_size); memcpy(dst->value.update.old_hash, src->value.update.old_hash, hash_size); } diff --git a/reftable/record_test.c b/reftable/record_test.c index 2876db7d27..999366ad46 100644 --- a/reftable/record_test.c +++ b/reftable/record_test.c @@ -231,8 +231,8 @@ static void test_reftable_log_record_roundtrip(void) .value_type = REFTABLE_LOG_UPDATE, .value = { .update = { - .new_hash = reftable_calloc(GIT_SHA1_RAWSZ), - .old_hash = reftable_calloc(GIT_SHA1_RAWSZ), + .new_hash = reftable_calloc(GIT_SHA1_RAWSZ, 1), + .old_hash = reftable_calloc(GIT_SHA1_RAWSZ, 1), .name = xstrdup("old name"), .email = xstrdup("old@email"), .message = xstrdup("old message"), diff --git a/reftable/refname.c b/reftable/refname.c index 9573496932..7570e4acf9 100644 --- a/reftable/refname.c +++ b/reftable/refname.c @@ -140,8 +140,8 @@ int validate_ref_record_addition(struct reftable_table tab, { struct modification mod = { .tab = tab, - .add = reftable_calloc(sizeof(char *) * sz), - .del = reftable_calloc(sizeof(char *) * sz), + .add = reftable_calloc(sz, sizeof(*mod.add)), + .del = reftable_calloc(sz, sizeof(*mod.del)), }; int i = 0; int err = 0; diff --git a/reftable/stack.c b/reftable/stack.c index 1dfab99e96..a7b2c61026 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -50,8 +50,7 @@ static ssize_t reftable_fd_write(void *arg, const void *data, size_t sz) int reftable_new_stack(struct reftable_stack **dest, const char *dir, struct reftable_write_options config) { - struct reftable_stack *p = - reftable_calloc(sizeof(struct reftable_stack)); + struct reftable_stack *p = reftable_calloc(1, sizeof(*p)); struct strbuf list_file_name = STRBUF_INIT; int err = 0; @@ -94,7 +93,7 @@ static int fd_read_lines(int fd, char ***namesp) goto done; } - buf = reftable_malloc(size + 1); + REFTABLE_ALLOC_ARRAY(buf, size + 1); if (read_in_full(fd, buf, size) != size) { err = REFTABLE_IO_ERROR; goto done; @@ -114,7 +113,7 @@ int read_lines(const char *filename, char ***namesp) int err = 0; if (fd < 0) { if (errno == ENOENT) { - *namesp = reftable_calloc(sizeof(char *)); + REFTABLE_CALLOC_ARRAY(*namesp, 1); return 0; } @@ -191,8 +190,7 @@ void reftable_stack_destroy(struct reftable_stack *st) static struct reftable_reader **stack_copy_readers(struct reftable_stack *st, int cur_len) { - struct reftable_reader **cur = - reftable_calloc(sizeof(struct reftable_reader *) * cur_len); + struct reftable_reader **cur = reftable_calloc(cur_len, sizeof(*cur)); int i = 0; for (i = 0; i < cur_len; i++) { cur[i] = st->readers[i]; @@ -208,9 +206,9 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, int err = 0; int names_len = names_length(names); struct reftable_reader **new_readers = - reftable_calloc(sizeof(struct reftable_reader *) * names_len); + reftable_calloc(names_len, sizeof(*new_readers)); struct reftable_table *new_tables = - reftable_calloc(sizeof(struct reftable_table) * names_len); + reftable_calloc(names_len, sizeof(*new_tables)); int new_readers_len = 0; struct reftable_merged_table *new_merged = NULL; struct strbuf table_path = STRBUF_INIT; @@ -344,7 +342,7 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, goto out; } - names = reftable_calloc(sizeof(char *)); + REFTABLE_CALLOC_ARRAY(names, 1); } else { err = fd_read_lines(fd, &names); if (err < 0) @@ -686,7 +684,7 @@ int reftable_stack_new_addition(struct reftable_addition **dest, { int err = 0; struct reftable_addition empty = REFTABLE_ADDITION_INIT; - *dest = reftable_calloc(sizeof(**dest)); + REFTABLE_CALLOC_ARRAY(*dest, 1); **dest = empty; err = reftable_stack_init_addition(*dest, st); if (err) { @@ -871,7 +869,7 @@ static int stack_write_compact(struct reftable_stack *st, { int subtabs_len = last - first + 1; struct reftable_table *subtabs = reftable_calloc( - sizeof(struct reftable_table) * (last - first + 1)); + last - first + 1, sizeof(*subtabs)); struct reftable_merged_table *mt = NULL; int err = 0; struct reftable_iterator it = { NULL }; @@ -979,9 +977,9 @@ static int stack_compact_range(struct reftable_stack *st, int first, int last, int compact_count = last - first + 1; char **listp = NULL; char **delete_on_success = - reftable_calloc(sizeof(char *) * (compact_count + 1)); + reftable_calloc(compact_count + 1, sizeof(*delete_on_success)); char **subtable_locks = - reftable_calloc(sizeof(char *) * (compact_count + 1)); + reftable_calloc(compact_count + 1, sizeof(*subtable_locks)); int i = 0; int j = 0; int is_empty_table = 0; @@ -1204,7 +1202,7 @@ int fastlog2(uint64_t sz) struct segment *sizes_to_segments(int *seglen, uint64_t *sizes, int n) { - struct segment *segs = reftable_calloc(sizeof(struct segment) * n); + struct segment *segs = reftable_calloc(n, sizeof(*segs)); int next = 0; struct segment cur = { 0 }; int i = 0; @@ -1268,7 +1266,7 @@ struct segment suggest_compaction_segment(uint64_t *sizes, int n) static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st) { uint64_t *sizes = - reftable_calloc(sizeof(uint64_t) * st->merged->stack_len); + reftable_calloc(st->merged->stack_len, sizeof(*sizes)); int version = (st->config.hash_id == GIT_SHA1_FORMAT_ID) ? 1 : 2; int overhead = header_size(version) - 1; int i = 0; diff --git a/reftable/tree.c b/reftable/tree.c index a5bf880985..528f33ae38 100644 --- a/reftable/tree.c +++ b/reftable/tree.c @@ -20,8 +20,8 @@ struct tree_node *tree_search(void *key, struct tree_node **rootp, if (!insert) { return NULL; } else { - struct tree_node *n = - reftable_calloc(sizeof(struct tree_node)); + struct tree_node *n; + REFTABLE_CALLOC_ARRAY(n, 1); n->key = key; *rootp = n; return *rootp; diff --git a/reftable/writer.c b/reftable/writer.c index 4483bb21c3..47b3448132 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -49,7 +49,7 @@ static int padded_write(struct reftable_writer *w, uint8_t *data, size_t len, { int n = 0; if (w->pending_padding > 0) { - uint8_t *zeroed = reftable_calloc(w->pending_padding); + uint8_t *zeroed = reftable_calloc(w->pending_padding, sizeof(*zeroed)); int n = w->write(w->write_arg, zeroed, w->pending_padding); if (n < 0) return n; @@ -123,8 +123,7 @@ struct reftable_writer * reftable_new_writer(ssize_t (*writer_func)(void *, const void *, size_t), void *writer_arg, struct reftable_write_options *opts) { - struct reftable_writer *wp = - reftable_calloc(sizeof(struct reftable_writer)); + struct reftable_writer *wp = reftable_calloc(1, sizeof(*wp)); strbuf_init(&wp->block_writer_data.last_key, 0); options_set_defaults(opts); if (opts->block_size >= (1 << 24)) { @@ -132,7 +131,7 @@ reftable_new_writer(ssize_t (*writer_func)(void *, const void *, size_t), abort(); } wp->last_key = reftable_empty_strbuf; - wp->block = reftable_calloc(opts->block_size); + REFTABLE_CALLOC_ARRAY(wp->block, opts->block_size); wp->write = writer_func; wp->write_arg = writer_arg; wp->opts = *opts; -- cgit 1.2.3-korg From ca63af0a248d31bf917d207722b284da41ffcee7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 6 Feb 2024 07:35:31 +0100 Subject: reftable/stack: fix parameter validation when compacting range The `stack_compact_range()` function receives a "first" and "last" index that indicates which tables of the reftable stack should be compacted. Naturally, "first" must be smaller than "last" in order to identify a proper range of tables to compress, which we indeed also assert in the function. But the validations happens after we have already allocated arrays with a size of `last - first + 1`, leading to an underflow and thus an invalid allocation size. Fix this by reordering the array allocations to happen after we have validated parameters. While at it, convert the array allocations to use the newly introduced macros. Note that the relevant variables pointing into arrays should also be converted to use `size_t` instead of `int`. This is left for a later commit in this series. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index a7b2c61026..1de2f6751c 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -966,6 +966,7 @@ done: static int stack_compact_range(struct reftable_stack *st, int first, int last, struct reftable_log_expiry_config *expiry) { + char **delete_on_success = NULL, **subtable_locks = NULL, **listp = NULL; struct strbuf temp_tab_file_name = STRBUF_INIT; struct strbuf new_table_name = STRBUF_INIT; struct strbuf lock_file_name = STRBUF_INIT; @@ -974,12 +975,7 @@ static int stack_compact_range(struct reftable_stack *st, int first, int last, int err = 0; int have_lock = 0; int lock_file_fd = -1; - int compact_count = last - first + 1; - char **listp = NULL; - char **delete_on_success = - reftable_calloc(compact_count + 1, sizeof(*delete_on_success)); - char **subtable_locks = - reftable_calloc(compact_count + 1, sizeof(*subtable_locks)); + int compact_count; int i = 0; int j = 0; int is_empty_table = 0; @@ -989,6 +985,10 @@ static int stack_compact_range(struct reftable_stack *st, int first, int last, goto done; } + compact_count = last - first + 1; + REFTABLE_CALLOC_ARRAY(delete_on_success, compact_count + 1); + REFTABLE_CALLOC_ARRAY(subtable_locks, compact_count + 1); + st->stats.attempts++; strbuf_reset(&lock_file_name); @@ -1146,12 +1146,14 @@ static int stack_compact_range(struct reftable_stack *st, int first, int last, done: free_names(delete_on_success); - listp = subtable_locks; - while (*listp) { - unlink(*listp); - listp++; + if (subtable_locks) { + listp = subtable_locks; + while (*listp) { + unlink(*listp); + listp++; + } + free_names(subtable_locks); } - free_names(subtable_locks); if (lock_file_fd >= 0) { close(lock_file_fd); lock_file_fd = -1; -- cgit 1.2.3-korg From 6d5e80fba2397708671cee6d9c5e394c4c187659 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 6 Feb 2024 07:35:35 +0100 Subject: reftable/stack: index segments with `size_t` We use `int`s to index into arrays of segments and track the length of them, which is considered to be a code smell in the Git project. Convert the code to use `size_t` instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 25 +++++++++++-------------- reftable/stack.h | 6 +++--- reftable/stack_test.c | 7 +++---- 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index 1de2f6751c..5da4ea8141 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -1202,12 +1202,11 @@ int fastlog2(uint64_t sz) return l - 1; } -struct segment *sizes_to_segments(int *seglen, uint64_t *sizes, int n) +struct segment *sizes_to_segments(size_t *seglen, uint64_t *sizes, size_t n) { struct segment *segs = reftable_calloc(n, sizeof(*segs)); - int next = 0; struct segment cur = { 0 }; - int i = 0; + size_t next = 0, i; if (n == 0) { *seglen = 0; @@ -1233,29 +1232,27 @@ struct segment *sizes_to_segments(int *seglen, uint64_t *sizes, int n) return segs; } -struct segment suggest_compaction_segment(uint64_t *sizes, int n) +struct segment suggest_compaction_segment(uint64_t *sizes, size_t n) { - int seglen = 0; - struct segment *segs = sizes_to_segments(&seglen, sizes, n); struct segment min_seg = { .log = 64, }; - int i = 0; + struct segment *segs; + size_t seglen = 0, i; + + segs = sizes_to_segments(&seglen, sizes, n); for (i = 0; i < seglen; i++) { - if (segment_size(&segs[i]) == 1) { + if (segment_size(&segs[i]) == 1) continue; - } - if (segs[i].log < min_seg.log) { + if (segs[i].log < min_seg.log) min_seg = segs[i]; - } } while (min_seg.start > 0) { - int prev = min_seg.start - 1; - if (fastlog2(min_seg.bytes) < fastlog2(sizes[prev])) { + size_t prev = min_seg.start - 1; + if (fastlog2(min_seg.bytes) < fastlog2(sizes[prev])) break; - } min_seg.start = prev; min_seg.bytes += sizes[prev]; diff --git a/reftable/stack.h b/reftable/stack.h index c1e3efa899..d919455669 100644 --- a/reftable/stack.h +++ b/reftable/stack.h @@ -32,13 +32,13 @@ struct reftable_stack { int read_lines(const char *filename, char ***lines); struct segment { - int start, end; + size_t start, end; int log; uint64_t bytes; }; int fastlog2(uint64_t sz); -struct segment *sizes_to_segments(int *seglen, uint64_t *sizes, int n); -struct segment suggest_compaction_segment(uint64_t *sizes, int n); +struct segment *sizes_to_segments(size_t *seglen, uint64_t *sizes, size_t n); +struct segment suggest_compaction_segment(uint64_t *sizes, size_t n); #endif diff --git a/reftable/stack_test.c b/reftable/stack_test.c index 289e902146..2d5b24e5c5 100644 --- a/reftable/stack_test.c +++ b/reftable/stack_test.c @@ -711,7 +711,7 @@ static void test_sizes_to_segments(void) uint64_t sizes[] = { 2, 3, 4, 5, 7, 9 }; /* .................0 1 2 3 4 5 */ - int seglen = 0; + size_t seglen = 0; struct segment *segs = sizes_to_segments(&seglen, sizes, ARRAY_SIZE(sizes)); EXPECT(segs[2].log == 3); @@ -726,7 +726,7 @@ static void test_sizes_to_segments(void) static void test_sizes_to_segments_empty(void) { - int seglen = 0; + size_t seglen = 0; struct segment *segs = sizes_to_segments(&seglen, NULL, 0); EXPECT(seglen == 0); reftable_free(segs); @@ -735,8 +735,7 @@ static void test_sizes_to_segments_empty(void) static void test_sizes_to_segments_all_equal(void) { uint64_t sizes[] = { 5, 5 }; - - int seglen = 0; + size_t seglen = 0; struct segment *segs = sizes_to_segments(&seglen, sizes, ARRAY_SIZE(sizes)); EXPECT(seglen == 1); -- cgit 1.2.3-korg From 47616c4399d958b8a9f40b1ad70071d2e3c56126 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 6 Feb 2024 07:35:41 +0100 Subject: reftable/stack: use `size_t` to track stack slices during compaction We use `int`s to track reftable slices when compacting the reftable stack, which is considered to be a code smell in the Git project. Convert the code to use `size_t` instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index 5da4ea8141..a86481a9a6 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -24,7 +24,8 @@ static int stack_try_add(struct reftable_stack *st, void *arg), void *arg); static int stack_write_compact(struct reftable_stack *st, - struct reftable_writer *wr, int first, int last, + struct reftable_writer *wr, + size_t first, size_t last, struct reftable_log_expiry_config *config); static int stack_check_addition(struct reftable_stack *st, const char *new_tab_name); @@ -820,7 +821,8 @@ uint64_t reftable_stack_next_update_index(struct reftable_stack *st) return 1; } -static int stack_compact_locked(struct reftable_stack *st, int first, int last, +static int stack_compact_locked(struct reftable_stack *st, + size_t first, size_t last, struct strbuf *temp_tab, struct reftable_log_expiry_config *config) { @@ -864,22 +866,21 @@ done: } static int stack_write_compact(struct reftable_stack *st, - struct reftable_writer *wr, int first, int last, + struct reftable_writer *wr, + size_t first, size_t last, struct reftable_log_expiry_config *config) { int subtabs_len = last - first + 1; struct reftable_table *subtabs = reftable_calloc( last - first + 1, sizeof(*subtabs)); struct reftable_merged_table *mt = NULL; - int err = 0; struct reftable_iterator it = { NULL }; struct reftable_ref_record ref = { NULL }; struct reftable_log_record log = { NULL }; - uint64_t entries = 0; + int err = 0; - int i = 0, j = 0; - for (i = first, j = 0; i <= last; i++) { + for (size_t i = first, j = 0; i <= last; i++) { struct reftable_reader *t = st->readers[i]; reftable_table_from_reader(&subtabs[j++], t); st->stats.bytes += t->size; @@ -963,7 +964,8 @@ done: } /* < 0: error. 0 == OK, > 0 attempt failed; could retry. */ -static int stack_compact_range(struct reftable_stack *st, int first, int last, +static int stack_compact_range(struct reftable_stack *st, + size_t first, size_t last, struct reftable_log_expiry_config *expiry) { char **delete_on_success = NULL, **subtable_locks = NULL, **listp = NULL; @@ -972,12 +974,10 @@ static int stack_compact_range(struct reftable_stack *st, int first, int last, struct strbuf lock_file_name = STRBUF_INIT; struct strbuf ref_list_contents = STRBUF_INIT; struct strbuf new_table_path = STRBUF_INIT; + size_t i, j, compact_count; int err = 0; int have_lock = 0; int lock_file_fd = -1; - int compact_count; - int i = 0; - int j = 0; int is_empty_table = 0; if (first > last || (!expiry && first == last)) { @@ -1172,17 +1172,17 @@ done: int reftable_stack_compact_all(struct reftable_stack *st, struct reftable_log_expiry_config *config) { - return stack_compact_range(st, 0, st->merged->stack_len - 1, config); + return stack_compact_range(st, 0, st->merged->stack_len ? + st->merged->stack_len - 1 : 0, config); } -static int stack_compact_range_stats(struct reftable_stack *st, int first, - int last, +static int stack_compact_range_stats(struct reftable_stack *st, + size_t first, size_t last, struct reftable_log_expiry_config *config) { int err = stack_compact_range(st, first, last, config); - if (err > 0) { + if (err > 0) st->stats.failures++; - } return err; } -- cgit 1.2.3-korg From 81879123c32d06406de5bfaf68baf6029660ca2a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 6 Feb 2024 07:35:46 +0100 Subject: reftable/stack: use `size_t` to track stack length While the stack length is already stored as `size_t`, we frequently use `int`s to refer to those stacks throughout the reftable library. Convert those cases to use `size_t` instead to make things consistent. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/basics.c | 7 +++---- reftable/basics.h | 2 +- reftable/merged.c | 11 +++++------ reftable/merged_test.c | 14 ++++++-------- reftable/reftable-merged.h | 2 +- reftable/stack.c | 21 ++++++++++----------- 6 files changed, 26 insertions(+), 31 deletions(-) diff --git a/reftable/basics.c b/reftable/basics.c index af9004cec2..0785aff941 100644 --- a/reftable/basics.c +++ b/reftable/basics.c @@ -64,12 +64,11 @@ void free_names(char **a) reftable_free(a); } -int names_length(char **names) +size_t names_length(char **names) { char **p = names; - for (; *p; p++) { - /* empty */ - } + while (*p) + p++; return p - names; } diff --git a/reftable/basics.h b/reftable/basics.h index 4c3ac963a3..91f3533efe 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -44,7 +44,7 @@ void parse_names(char *buf, int size, char ***namesp); int names_equal(char **a, char **b); /* returns the array size of a NULL-terminated array of strings. */ -int names_length(char **names); +size_t names_length(char **names); /* Allocation routines; they invoke the functions set through * reftable_set_alloc() */ diff --git a/reftable/merged.c b/reftable/merged.c index 2031fd51b4..e2c6253324 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -45,11 +45,10 @@ static int merged_iter_init(struct merged_iter *mi) static void merged_iter_close(void *p) { struct merged_iter *mi = p; - int i = 0; + merged_iter_pqueue_release(&mi->pq); - for (i = 0; i < mi->stack_len; i++) { + for (size_t i = 0; i < mi->stack_len; i++) reftable_iterator_destroy(&mi->stack[i]); - } reftable_free(mi->stack); strbuf_release(&mi->key); strbuf_release(&mi->entry_key); @@ -168,14 +167,14 @@ static void iterator_from_merged_iter(struct reftable_iterator *it, } int reftable_new_merged_table(struct reftable_merged_table **dest, - struct reftable_table *stack, int n, + struct reftable_table *stack, size_t n, uint32_t hash_id) { struct reftable_merged_table *m = NULL; uint64_t last_max = 0; uint64_t first_min = 0; - int i = 0; - for (i = 0; i < n; i++) { + + for (size_t i = 0; i < n; i++) { uint64_t min = reftable_table_min_update_index(&stack[i]); uint64_t max = reftable_table_max_update_index(&stack[i]); diff --git a/reftable/merged_test.c b/reftable/merged_test.c index e233a9d581..442917cc83 100644 --- a/reftable/merged_test.c +++ b/reftable/merged_test.c @@ -88,18 +88,17 @@ static struct reftable_merged_table * merged_table_from_records(struct reftable_ref_record **refs, struct reftable_block_source **source, struct reftable_reader ***readers, int *sizes, - struct strbuf *buf, int n) + struct strbuf *buf, size_t n) { - int i = 0; struct reftable_merged_table *mt = NULL; - int err; struct reftable_table *tabs; + int err; REFTABLE_CALLOC_ARRAY(tabs, n); REFTABLE_CALLOC_ARRAY(*readers, n); REFTABLE_CALLOC_ARRAY(*source, n); - for (i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { write_test_table(&buf[i], refs[i], sizes[i]); block_source_from_strbuf(&(*source)[i], &buf[i]); @@ -263,18 +262,17 @@ static struct reftable_merged_table * merged_table_from_log_records(struct reftable_log_record **logs, struct reftable_block_source **source, struct reftable_reader ***readers, int *sizes, - struct strbuf *buf, int n) + struct strbuf *buf, size_t n) { - int i = 0; struct reftable_merged_table *mt = NULL; - int err; struct reftable_table *tabs; + int err; REFTABLE_CALLOC_ARRAY(tabs, n); REFTABLE_CALLOC_ARRAY(*readers, n); REFTABLE_CALLOC_ARRAY(*source, n); - for (i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { write_test_log_table(&buf[i], logs[i], sizes[i], i + 1); block_source_from_strbuf(&(*source)[i], &buf[i]); diff --git a/reftable/reftable-merged.h b/reftable/reftable-merged.h index 1a6d16915a..c91a2d83a2 100644 --- a/reftable/reftable-merged.h +++ b/reftable/reftable-merged.h @@ -33,7 +33,7 @@ struct reftable_table; the stack array. */ int reftable_new_merged_table(struct reftable_merged_table **dest, - struct reftable_table *stack, int n, + struct reftable_table *stack, size_t n, uint32_t hash_id); /* returns an iterator positioned just before 'name' */ diff --git a/reftable/stack.c b/reftable/stack.c index a86481a9a6..bb684a3dc1 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -202,18 +202,18 @@ static struct reftable_reader **stack_copy_readers(struct reftable_stack *st, static int reftable_stack_reload_once(struct reftable_stack *st, char **names, int reuse_open) { - int cur_len = !st->merged ? 0 : st->merged->stack_len; + size_t cur_len = !st->merged ? 0 : st->merged->stack_len; struct reftable_reader **cur = stack_copy_readers(st, cur_len); - int err = 0; - int names_len = names_length(names); + size_t names_len = names_length(names); struct reftable_reader **new_readers = reftable_calloc(names_len, sizeof(*new_readers)); struct reftable_table *new_tables = reftable_calloc(names_len, sizeof(*new_tables)); - int new_readers_len = 0; + size_t new_readers_len = 0; struct reftable_merged_table *new_merged = NULL; struct strbuf table_path = STRBUF_INIT; - int i; + int err = 0; + size_t i; while (*names) { struct reftable_reader *rd = NULL; @@ -221,11 +221,10 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, /* this is linear; we assume compaction keeps the number of tables under control so this is not quadratic. */ - int j = 0; - for (j = 0; reuse_open && j < cur_len; j++) { - if (cur[j] && 0 == strcmp(cur[j]->name, name)) { - rd = cur[j]; - cur[j] = NULL; + for (i = 0; reuse_open && i < cur_len; i++) { + if (cur[i] && 0 == strcmp(cur[i]->name, name)) { + rd = cur[i]; + cur[i] = NULL; break; } } @@ -870,7 +869,7 @@ static int stack_write_compact(struct reftable_stack *st, size_t first, size_t last, struct reftable_log_expiry_config *config) { - int subtabs_len = last - first + 1; + size_t subtabs_len = last - first + 1; struct reftable_table *subtabs = reftable_calloc( last - first + 1, sizeof(*subtabs)); struct reftable_merged_table *mt = NULL; -- cgit 1.2.3-korg From 59f302ca5abf3e8ec4f14f098b26adf786017fad Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 6 Feb 2024 07:35:50 +0100 Subject: reftable/merged: refactor seeking of records The code to seek reftable records in the merged table code is quite hard to read and does not conform to our coding style in multiple ways: - We have multiple exit paths where we release resources even though that is not really necessary. - We use a scoped error variable `e` which is hard to reason about. This variable is not required at all. - We allocate memory in the variable declarations, which is easy to miss. Refactor the function so that it becomes more maintainable in the future. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 54 +++++++++++++++++++++--------------------------------- 1 file changed, 21 insertions(+), 33 deletions(-) diff --git a/reftable/merged.c b/reftable/merged.c index e2c6253324..0abcda26e8 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -238,50 +238,38 @@ static int merged_table_seek_record(struct reftable_merged_table *mt, struct reftable_iterator *it, struct reftable_record *rec) { - struct reftable_iterator *iters = reftable_calloc( - mt->stack_len, sizeof(*iters)); struct merged_iter merged = { - .stack = iters, .typ = reftable_record_type(rec), .hash_id = mt->hash_id, .suppress_deletions = mt->suppress_deletions, .key = STRBUF_INIT, .entry_key = STRBUF_INIT, }; - int n = 0; - int err = 0; - int i = 0; - for (i = 0; i < mt->stack_len && err == 0; i++) { - int e = reftable_table_seek_record(&mt->stack[i], &iters[n], - rec); - if (e < 0) { - err = e; - } - if (e == 0) { - n++; - } - } - if (err < 0) { - int i = 0; - for (i = 0; i < n; i++) { - reftable_iterator_destroy(&iters[i]); - } - reftable_free(iters); - return err; + struct merged_iter *p; + int err; + + REFTABLE_CALLOC_ARRAY(merged.stack, mt->stack_len); + for (size_t i = 0; i < mt->stack_len; i++) { + err = reftable_table_seek_record(&mt->stack[i], + &merged.stack[merged.stack_len], rec); + if (err < 0) + goto out; + if (!err) + merged.stack_len++; } - merged.stack_len = n; err = merged_iter_init(&merged); - if (err < 0) { + if (err < 0) + goto out; + + p = reftable_malloc(sizeof(struct merged_iter)); + *p = merged; + iterator_from_merged_iter(it, p); + +out: + if (err < 0) merged_iter_close(&merged); - return err; - } else { - struct merged_iter *p = - reftable_malloc(sizeof(struct merged_iter)); - *p = merged; - iterator_from_merged_iter(it, p); - } - return 0; + return err; } int reftable_merged_table_seek_ref(struct reftable_merged_table *mt, -- cgit 1.2.3-korg From 62d3c8e8c8a3dc3113cead8d9dd36f7e59054670 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 6 Feb 2024 07:35:55 +0100 Subject: reftable/merged: refactor initialization of iterators Refactor the initialization of the merged iterator to fit our code style better. This refactoring prepares the code for a refactoring of how records are being initialized. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/reftable/merged.c b/reftable/merged.c index 0abcda26e8..0e60e2a39b 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -19,24 +19,23 @@ https://developers.google.com/open-source/licenses/bsd static int merged_iter_init(struct merged_iter *mi) { - int i = 0; - for (i = 0; i < mi->stack_len; i++) { - struct reftable_record rec = reftable_new_record(mi->typ); - int err = iterator_next(&mi->stack[i], &rec); - if (err < 0) { + for (size_t i = 0; i < mi->stack_len; i++) { + struct pq_entry e = { + .rec = reftable_new_record(mi->typ), + .index = i, + }; + int err; + + err = iterator_next(&mi->stack[i], &e.rec); + if (err < 0) return err; - } - if (err > 0) { reftable_iterator_destroy(&mi->stack[i]); - reftable_record_release(&rec); - } else { - struct pq_entry e = { - .rec = rec, - .index = i, - }; - merged_iter_pqueue_add(&mi->pq, &e); + reftable_record_release(&e.rec); + continue; } + + merged_iter_pqueue_add(&mi->pq, &e); } return 0; -- cgit 1.2.3-korg From 3ddef475d008b8a705a53e6bb1405bb773ffdc50 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 6 Feb 2024 07:35:59 +0100 Subject: reftable/record: improve semantics when initializing records According to our usual coding style, the `reftable_new_record()` function would indicate that it is allocating a new record. This is not the case though as the function merely initializes records without allocating any memory. Replace `reftable_new_record()` with a new `reftable_record_init()` function that takes a record pointer as input and initializes it accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 18 +++++++++--------- reftable/merged.c | 8 +++++--- reftable/reader.c | 4 ++-- reftable/record.c | 43 ++++++++++--------------------------------- reftable/record.h | 10 +++++----- reftable/record_test.c | 4 ++-- 6 files changed, 33 insertions(+), 54 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index 838759823a..ce8a7d24f3 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -382,23 +382,23 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it, .key = *want, .r = br, }; - struct reftable_record rec = reftable_new_record(block_reader_type(br)); - int err = 0; struct block_iter next = BLOCK_ITER_INIT; + struct reftable_record rec; + int err = 0, i; - int i = binsearch(br->restart_count, &restart_key_less, &args); if (args.error) { err = REFTABLE_FORMAT_ERROR; goto done; } - it->br = br; - if (i > 0) { - i--; - it->next_off = block_reader_restart_offset(br, i); - } else { + i = binsearch(br->restart_count, &restart_key_less, &args); + if (i > 0) + it->next_off = block_reader_restart_offset(br, i - 1); + else it->next_off = br->header_off + 4; - } + it->br = br; + + reftable_record_init(&rec, block_reader_type(br)); /* We're looking for the last entry less/equal than the wanted key, so we have to go one entry too far and then back up. diff --git a/reftable/merged.c b/reftable/merged.c index 0e60e2a39b..a0f222e07b 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -21,11 +21,11 @@ static int merged_iter_init(struct merged_iter *mi) { for (size_t i = 0; i < mi->stack_len; i++) { struct pq_entry e = { - .rec = reftable_new_record(mi->typ), .index = i, }; int err; + reftable_record_init(&e.rec, mi->typ); err = iterator_next(&mi->stack[i], &e.rec); if (err < 0) return err; @@ -57,10 +57,12 @@ static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, size_t idx) { struct pq_entry e = { - .rec = reftable_new_record(mi->typ), .index = idx, }; - int err = iterator_next(&mi->stack[idx], &e.rec); + int err; + + reftable_record_init(&e.rec, mi->typ); + err = iterator_next(&mi->stack[idx], &e.rec); if (err < 0) return err; diff --git a/reftable/reader.c b/reftable/reader.c index 3e0de5e8ad..5e6c8f30a1 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -444,13 +444,13 @@ static int reader_start(struct reftable_reader *r, struct table_iter *ti, static int reader_seek_linear(struct table_iter *ti, struct reftable_record *want) { - struct reftable_record rec = - reftable_new_record(reftable_record_type(want)); struct strbuf want_key = STRBUF_INIT; struct strbuf got_key = STRBUF_INIT; struct table_iter next = TABLE_ITER_INIT; + struct reftable_record rec; int err = -1; + reftable_record_init(&rec, reftable_record_type(want)); reftable_record_key(want, &want_key); while (1) { diff --git a/reftable/record.c b/reftable/record.c index 2f3cd6b62c..26c5e43f9b 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -1259,45 +1259,22 @@ reftable_record_vtable(struct reftable_record *rec) abort(); } -struct reftable_record reftable_new_record(uint8_t typ) +void reftable_record_init(struct reftable_record *rec, uint8_t typ) { - struct reftable_record clean = { - .type = typ, - }; + memset(rec, 0, sizeof(*rec)); + rec->type = typ; - /* the following is involved, but the naive solution (just return - * `clean` as is, except for BLOCK_TYPE_INDEX), returns a garbage - * clean.u.obj.offsets pointer on Windows VS CI. Go figure. - */ switch (typ) { - case BLOCK_TYPE_OBJ: - { - struct reftable_obj_record obj = { 0 }; - clean.u.obj = obj; - break; - } - case BLOCK_TYPE_INDEX: - { - struct reftable_index_record idx = { - .last_key = STRBUF_INIT, - }; - clean.u.idx = idx; - break; - } case BLOCK_TYPE_REF: - { - struct reftable_ref_record ref = { 0 }; - clean.u.ref = ref; - break; - } case BLOCK_TYPE_LOG: - { - struct reftable_log_record log = { 0 }; - clean.u.log = log; - break; - } + case BLOCK_TYPE_OBJ: + return; + case BLOCK_TYPE_INDEX: + strbuf_init(&rec->u.idx.last_key, 0); + return; + default: + BUG("unhandled record type"); } - return clean; } void reftable_record_print(struct reftable_record *rec, int hash_size) diff --git a/reftable/record.h b/reftable/record.h index fd80cd451d..e64ed30c80 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -69,9 +69,6 @@ struct reftable_record_vtable { /* returns true for recognized block types. Block start with the block type. */ int reftable_is_block_type(uint8_t typ); -/* return an initialized record for the given type */ -struct reftable_record reftable_new_record(uint8_t typ); - /* Encode `key` into `dest`. Sets `is_restart` to indicate a restart. Returns * number of bytes written. */ int reftable_encode_key(int *is_restart, struct string_view dest, @@ -100,8 +97,8 @@ struct reftable_obj_record { /* record is a generic wrapper for different types of records. It is normally * created on the stack, or embedded within another struct. If the type is * known, a fresh instance can be initialized explicitly. Otherwise, use - * reftable_new_record() to initialize generically (as the index_record is not - * valid as 0-initialized structure) + * `reftable_record_init()` to initialize generically (as the index_record is + * not valid as 0-initialized structure) */ struct reftable_record { uint8_t type; @@ -113,6 +110,9 @@ struct reftable_record { } u; }; +/* Initialize the reftable record for the given type */ +void reftable_record_init(struct reftable_record *rec, uint8_t typ); + /* see struct record_vtable */ int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size); void reftable_record_print(struct reftable_record *rec, int hash_size); diff --git a/reftable/record_test.c b/reftable/record_test.c index 999366ad46..a86cff5526 100644 --- a/reftable/record_test.c +++ b/reftable/record_test.c @@ -16,11 +16,11 @@ static void test_copy(struct reftable_record *rec) { - struct reftable_record copy = { 0 }; + struct reftable_record copy; uint8_t typ; typ = reftable_record_type(rec); - copy = reftable_new_record(typ); + reftable_record_init(©, typ); reftable_record_copy_from(©, rec, GIT_SHA1_RAWSZ); /* do it twice to catch memory leaks */ reftable_record_copy_from(©, rec, GIT_SHA1_RAWSZ); -- cgit 1.2.3-korg