aboutsummaryrefslogtreecommitdiffstats
path: root/reftable
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2024-02-26 18:10:24 -0800
committerJunio C Hamano <gitster@pobox.com>2024-02-26 18:10:24 -0800
commit9f67cbd0a7725857d219da6720fc9a5acfda2960 (patch)
treef7566b4db8e2f5d362d3464965e4a324d8d8516f /reftable
parent274400998b0a2da64d20e9fa95b288bf1403459c (diff)
parentc68ca7abd30b22404ce59d5133566729c07ffe8f (diff)
downloadgit-9f67cbd0a7725857d219da6720fc9a5acfda2960.tar.gz
Merge branch 'ps/reftable-iteration-perf'
The code to iterate over refs with the reftable backend has seen some optimization. * ps/reftable-iteration-perf: reftable/reader: add comments to `table_iter_next()` reftable/record: don't try to reallocate ref record name reftable/block: swap buffers instead of copying reftable/pq: allocation-less comparison of entry keys reftable/merged: skip comparison for records of the same subiter reftable/merged: allocation-less dropping of shadowed records reftable/record: introduce function to compare records by key
Diffstat (limited to 'reftable')
-rw-r--r--reftable/block.c3
-rw-r--r--reftable/merged.c19
-rw-r--r--reftable/merged.h2
-rw-r--r--reftable/pq.c13
-rw-r--r--reftable/reader.c26
-rw-r--r--reftable/record.c67
-rw-r--r--reftable/record.h7
7 files changed, 100 insertions, 37 deletions
diff --git a/reftable/block.c b/reftable/block.c
index ce8a7d24f3..72eb73b380 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -339,8 +339,7 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
return -1;
string_view_consume(&in, n);
- strbuf_reset(&it->last_key);
- strbuf_addbuf(&it->last_key, &it->key);
+ strbuf_swap(&it->last_key, &it->key);
it->next_off += start.len - in.len;
return 0;
}
diff --git a/reftable/merged.c b/reftable/merged.c
index a0f222e07b..1aa6cd31b7 100644
--- a/reftable/merged.c
+++ b/reftable/merged.c
@@ -49,8 +49,6 @@ static void merged_iter_close(void *p)
for (size_t i = 0; i < mi->stack_len; i++)
reftable_iterator_destroy(&mi->stack[i]);
reftable_free(mi->stack);
- strbuf_release(&mi->key);
- strbuf_release(&mi->entry_key);
}
static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi,
@@ -105,14 +103,19 @@ static int merged_iter_next_entry(struct merged_iter *mi,
such a deployment, the loop below must be changed to collect all
entries for the same key, and return new the newest one.
*/
- reftable_record_key(&entry.rec, &mi->entry_key);
while (!merged_iter_pqueue_is_empty(mi->pq)) {
struct pq_entry top = merged_iter_pqueue_top(mi->pq);
- int cmp = 0;
-
- reftable_record_key(&top.rec, &mi->key);
+ int cmp;
+
+ /*
+ * When the next entry comes from the same queue as the current
+ * entry then it must by definition be larger. This avoids a
+ * comparison in the most common case.
+ */
+ if (top.index == entry.index)
+ break;
- cmp = strbuf_cmp(&mi->key, &mi->entry_key);
+ cmp = reftable_record_cmp(&top.rec, &entry.rec);
if (cmp > 0)
break;
@@ -243,8 +246,6 @@ static int merged_table_seek_record(struct reftable_merged_table *mt,
.typ = reftable_record_type(rec),
.hash_id = mt->hash_id,
.suppress_deletions = mt->suppress_deletions,
- .key = STRBUF_INIT,
- .entry_key = STRBUF_INIT,
};
struct merged_iter *p;
int err;
diff --git a/reftable/merged.h b/reftable/merged.h
index d5b39dfe7f..7d9f95d27e 100644
--- a/reftable/merged.h
+++ b/reftable/merged.h
@@ -31,8 +31,6 @@ struct merged_iter {
uint8_t typ;
int suppress_deletions;
struct merged_iter_pqueue pq;
- struct strbuf key;
- struct strbuf entry_key;
};
void merged_table_release(struct reftable_merged_table *mt);
diff --git a/reftable/pq.c b/reftable/pq.c
index 2461daf5ff..e0ccce2b97 100644
--- a/reftable/pq.c
+++ b/reftable/pq.c
@@ -14,20 +14,9 @@ https://developers.google.com/open-source/licenses/bsd
int pq_less(struct pq_entry *a, struct pq_entry *b)
{
- struct strbuf ak = STRBUF_INIT;
- struct strbuf bk = STRBUF_INIT;
- int cmp = 0;
- reftable_record_key(&a->rec, &ak);
- reftable_record_key(&b->rec, &bk);
-
- cmp = strbuf_cmp(&ak, &bk);
-
- strbuf_release(&ak);
- strbuf_release(&bk);
-
+ int cmp = reftable_record_cmp(&a->rec, &b->rec);
if (cmp == 0)
return a->index > b->index;
-
return cmp < 0;
}
diff --git a/reftable/reader.c b/reftable/reader.c
index 2663b03938..b113daab77 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -357,24 +357,32 @@ static int table_iter_next(struct table_iter *ti, struct reftable_record *rec)
while (1) {
struct table_iter next = TABLE_ITER_INIT;
- int err = 0;
- if (ti->is_finished) {
+ int err;
+
+ if (ti->is_finished)
return 1;
- }
+ /*
+ * Check whether the current block still has more records. If
+ * so, return it. If the iterator returns positive then the
+ * current block has been exhausted.
+ */
err = table_iter_next_in_block(ti, rec);
- if (err <= 0) {
+ if (err <= 0)
return err;
- }
+ /*
+ * Otherwise, we need to continue to the next block in the
+ * table and retry. If there are no more blocks then the
+ * iterator is drained.
+ */
err = table_iter_next_block(&next, ti);
- if (err != 0) {
- ti->is_finished = 1;
- }
table_iter_block_done(ti);
- if (err != 0) {
+ if (err) {
+ ti->is_finished = 1;
return err;
}
+
table_iter_copy_from(ti, &next);
block_iter_close(&next.bi);
}
diff --git a/reftable/record.c b/reftable/record.c
index 26c5e43f9b..d6bb42e887 100644
--- a/reftable/record.c
+++ b/reftable/record.c
@@ -377,10 +377,11 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key,
assert(hash_size > 0);
- r->refname = reftable_realloc(r->refname, key.len + 1);
+ r->refname = reftable_malloc(key.len + 1);
memcpy(r->refname, key.buf, key.len);
- r->update_index = update_index;
r->refname[key.len] = 0;
+
+ r->update_index = update_index;
r->value_type = val_type;
switch (val_type) {
case REFTABLE_REF_VAL1:
@@ -430,7 +431,6 @@ static int reftable_ref_record_is_deletion_void(const void *p)
(const struct reftable_ref_record *)p);
}
-
static int reftable_ref_record_equal_void(const void *a,
const void *b, int hash_size)
{
@@ -439,6 +439,13 @@ static int reftable_ref_record_equal_void(const void *a,
return reftable_ref_record_equal(ra, rb, hash_size);
}
+static int reftable_ref_record_cmp_void(const void *_a, const void *_b)
+{
+ const struct reftable_ref_record *a = _a;
+ const struct reftable_ref_record *b = _b;
+ return strcmp(a->refname, b->refname);
+}
+
static void reftable_ref_record_print_void(const void *rec,
int hash_size)
{
@@ -455,6 +462,7 @@ static struct reftable_record_vtable reftable_ref_record_vtable = {
.release = &reftable_ref_record_release_void,
.is_deletion = &reftable_ref_record_is_deletion_void,
.equal = &reftable_ref_record_equal_void,
+ .cmp = &reftable_ref_record_cmp_void,
.print = &reftable_ref_record_print_void,
};
@@ -627,6 +635,25 @@ static int reftable_obj_record_equal_void(const void *a, const void *b, int hash
return 1;
}
+static int reftable_obj_record_cmp_void(const void *_a, const void *_b)
+{
+ const struct reftable_obj_record *a = _a;
+ const struct reftable_obj_record *b = _b;
+ int cmp;
+
+ cmp = memcmp(a->hash_prefix, b->hash_prefix,
+ a->hash_prefix_len > b->hash_prefix_len ?
+ a->hash_prefix_len : b->hash_prefix_len);
+ if (cmp)
+ return cmp;
+
+ /*
+ * When the prefix is the same then the object record that is longer is
+ * considered to be bigger.
+ */
+ return a->hash_prefix_len - b->hash_prefix_len;
+}
+
static struct reftable_record_vtable reftable_obj_record_vtable = {
.key = &reftable_obj_record_key,
.type = BLOCK_TYPE_OBJ,
@@ -637,6 +664,7 @@ static struct reftable_record_vtable reftable_obj_record_vtable = {
.release = &reftable_obj_record_release,
.is_deletion = &not_a_deletion,
.equal = &reftable_obj_record_equal_void,
+ .cmp = &reftable_obj_record_cmp_void,
.print = &reftable_obj_record_print,
};
@@ -955,6 +983,22 @@ static int reftable_log_record_equal_void(const void *a,
hash_size);
}
+static int reftable_log_record_cmp_void(const void *_a, const void *_b)
+{
+ const struct reftable_log_record *a = _a;
+ const struct reftable_log_record *b = _b;
+ int cmp = strcmp(a->refname, b->refname);
+ if (cmp)
+ return cmp;
+
+ /*
+ * Note that the comparison here is reversed. This is because the
+ * update index is reversed when comparing keys. For reference, see how
+ * we handle this in reftable_log_record_key()`.
+ */
+ return b->update_index - a->update_index;
+}
+
int reftable_log_record_equal(const struct reftable_log_record *a,
const struct reftable_log_record *b, int hash_size)
{
@@ -1004,6 +1048,7 @@ static struct reftable_record_vtable reftable_log_record_vtable = {
.release = &reftable_log_record_release_void,
.is_deletion = &reftable_log_record_is_deletion_void,
.equal = &reftable_log_record_equal_void,
+ .cmp = &reftable_log_record_cmp_void,
.print = &reftable_log_record_print_void,
};
@@ -1079,6 +1124,13 @@ static int reftable_index_record_equal(const void *a, const void *b, int hash_si
return ia->offset == ib->offset && !strbuf_cmp(&ia->last_key, &ib->last_key);
}
+static int reftable_index_record_cmp(const void *_a, const void *_b)
+{
+ const struct reftable_index_record *a = _a;
+ const struct reftable_index_record *b = _b;
+ return strbuf_cmp(&a->last_key, &b->last_key);
+}
+
static void reftable_index_record_print(const void *rec, int hash_size)
{
const struct reftable_index_record *idx = rec;
@@ -1096,6 +1148,7 @@ static struct reftable_record_vtable reftable_index_record_vtable = {
.release = &reftable_index_record_release,
.is_deletion = &not_a_deletion,
.equal = &reftable_index_record_equal,
+ .cmp = &reftable_index_record_cmp,
.print = &reftable_index_record_print,
};
@@ -1149,6 +1202,14 @@ int reftable_record_is_deletion(struct reftable_record *rec)
reftable_record_data(rec));
}
+int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b)
+{
+ if (a->type != b->type)
+ BUG("cannot compare reftable records of different type");
+ return reftable_record_vtable(a)->cmp(
+ reftable_record_data(a), reftable_record_data(b));
+}
+
int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size)
{
if (a->type != b->type)
diff --git a/reftable/record.h b/reftable/record.h
index e64ed30c80..a05e2be179 100644
--- a/reftable/record.h
+++ b/reftable/record.h
@@ -62,6 +62,12 @@ struct reftable_record_vtable {
/* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */
int (*equal)(const void *a, const void *b, int hash_size);
+ /*
+ * Compare keys of two records with each other. The records must have
+ * the same type.
+ */
+ int (*cmp)(const void *a, const void *b);
+
/* Print on stdout, for debugging. */
void (*print)(const void *rec, int hash_size);
};
@@ -114,6 +120,7 @@ struct reftable_record {
void reftable_record_init(struct reftable_record *rec, uint8_t typ);
/* see struct record_vtable */
+int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b);
int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size);
void reftable_record_print(struct reftable_record *rec, int hash_size);
void reftable_record_key(struct reftable_record *rec, struct strbuf *dest);