aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTaylor Blau <me@ttaylorr.com>2024-04-29 16:44:14 -0400
committerJunio C Hamano <gitster@pobox.com>2024-04-30 13:00:27 -0700
commitc7188a0e2c51cbf7a24e77dc738e202a7c132b7b (patch)
tree447fca2e860e91b9bf5c50cefb2ce41cbc159942
parent6b946a7496eae091d867078c731829c7d8de303c (diff)
downloadgit-c7188a0e2c51cbf7a24e77dc738e202a7c132b7b.tar.gz
pack-bitmap.c: use pseudo-merges during traversal
Notice: this object is not reachable from any branch.
Now that all of the groundwork has been laid to support reading and using pseudo-merges, make use of that work in this commit by teaching the pack-bitmap machinery to use pseudo-merge(s) when available during traversal. The basic operation is as follows: - When enumerating objects on either side of a reachability query, first see if any subset of the roots satisfies some pseudo-merge bitmap. If it does, apply that pseudo-merge bitmap. - If any pseudo-merge bitmap(s) were applied in the previous step, OR them into the result[^1]. Then repeat the process over all pseudo-merge bitmaps (we'll refer to this as "cascading" pseudo-merges). Once this is done, OR in the resulting bitmap. - If there is no fill-in traversal to be done, return the bitmap for that side of the reachability query. If there is fill-in traversal, then for each commit we encounter via show_commit(), check to see if any unsatisfied pseudo-merges containing that commit as one of its parents has been made satisfied by the presence of that commit. If so, OR in the object set from that pseudo-merge bitmap, and then cascade. If not, continue traversal. A similar implementation is present in the boundary-based bitmap traversal routines. [^1]: Importantly, we cannot OR in the entire set of roots along with the objects reachable from whatever pseudo-merge bitmaps were satisfied. This may leave some dangling bits corresponding to any unsatisfied root(s) getting OR'd into the resulting bitmap, tricking other parts of the traversal into thinking we already have a reachability closure over those commit(s) when we do not. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Notice: this object is not reachable from any branch.
-rw-r--r--pack-bitmap.c112
-rwxr-xr-xt/t5333-pseudo-merge-bitmaps.sh325
2 files changed, 436 insertions, 1 deletions
diff --git a/pack-bitmap.c b/pack-bitmap.c
index c13074673a..e61058dada 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -114,6 +114,9 @@ struct bitmap_index {
unsigned int version;
};
+static int pseudo_merges_satisfied_nr;
+static int pseudo_merges_cascades_nr;
+
static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st)
{
struct ewah_bitmap *parent;
@@ -1006,6 +1009,22 @@ static void show_commit(struct commit *commit UNUSED,
{
}
+static unsigned apply_pseudo_merges_for_commit_1(struct bitmap_index *bitmap_git,
+ struct bitmap *result,
+ struct commit *commit,
+ uint32_t commit_pos)
+{
+ int ret;
+
+ ret = apply_pseudo_merges_for_commit(&bitmap_git->pseudo_merges,
+ result, commit, commit_pos);
+
+ if (ret)
+ pseudo_merges_satisfied_nr += ret;
+
+ return ret;
+}
+
static int add_to_include_set(struct bitmap_index *bitmap_git,
struct include_data *data,
struct commit *commit,
@@ -1026,6 +1045,10 @@ static int add_to_include_set(struct bitmap_index *bitmap_git,
}
bitmap_set(data->base, bitmap_pos);
+ if (apply_pseudo_merges_for_commit_1(bitmap_git, data->base, commit,
+ bitmap_pos))
+ return 0;
+
return 1;
}
@@ -1151,6 +1174,20 @@ static void show_boundary_object(struct object *object UNUSED,
BUG("should not be called");
}
+static unsigned cascade_pseudo_merges_1(struct bitmap_index *bitmap_git,
+ struct bitmap *result,
+ struct bitmap *roots)
+{
+ int ret = cascade_pseudo_merges(&bitmap_git->pseudo_merges,
+ result, roots);
+ if (ret) {
+ pseudo_merges_cascades_nr++;
+ pseudo_merges_satisfied_nr += ret;
+ }
+
+ return ret;
+}
+
static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct object_list *roots)
@@ -1160,6 +1197,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
+ int existing_bitmaps = 0;
cb.bitmap_git = bitmap_git;
cb.base = bitmap_new();
@@ -1167,6 +1205,25 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
revs->ignore_missing_links = 1;
+ if (bitmap_git->pseudo_merges.nr) {
+ struct bitmap *roots_bitmap = bitmap_new();
+ struct object_list *objects = NULL;
+
+ for (objects = roots; objects; objects = objects->next) {
+ struct object *object = objects->item;
+ int pos;
+
+ pos = bitmap_position(bitmap_git, &object->oid);
+ if (pos < 0)
+ continue;
+
+ bitmap_set(roots_bitmap, pos);
+ }
+
+ if (!cascade_pseudo_merges_1(bitmap_git, cb.base, roots_bitmap))
+ bitmap_free(roots_bitmap);
+ }
+
/*
* OR in any existing reachability bitmaps among `roots` into
* `cb.base`.
@@ -1178,8 +1235,10 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
continue;
if (add_commit_to_bitmap(bitmap_git, &cb.base,
- (struct commit *)object))
+ (struct commit *)object)) {
+ existing_bitmaps = 1;
continue;
+ }
any_missing = 1;
}
@@ -1187,6 +1246,9 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
if (!any_missing)
goto cleanup;
+ if (existing_bitmaps)
+ cascade_pseudo_merges_1(bitmap_git, cb.base, NULL);
+
tmp_blobs = revs->blob_objects;
tmp_trees = revs->tree_objects;
tmp_tags = revs->blob_objects;
@@ -1242,6 +1304,13 @@ cleanup:
return cb.base;
}
+static void unsatisfy_all_pseudo_merges(struct bitmap_index *bitmap_git)
+{
+ uint32_t i;
+ for (i = 0; i < bitmap_git->pseudo_merges.nr; i++)
+ bitmap_git->pseudo_merges.v[i].satisfied = 0;
+}
+
static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct object_list *roots,
@@ -1249,9 +1318,32 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
{
struct bitmap *base = NULL;
int needs_walk = 0;
+ unsigned existing_bitmaps = 0;
struct object_list *not_mapped = NULL;
+ unsatisfy_all_pseudo_merges(bitmap_git);
+
+ if (bitmap_git->pseudo_merges.nr) {
+ struct bitmap *roots_bitmap = bitmap_new();
+ struct object_list *objects = NULL;
+
+ for (objects = roots; objects; objects = objects->next) {
+ struct object *object = objects->item;
+ int pos;
+
+ pos = bitmap_position(bitmap_git, &object->oid);
+ if (pos < 0)
+ continue;
+
+ bitmap_set(roots_bitmap, pos);
+ }
+
+ base = bitmap_new();
+ if (!cascade_pseudo_merges_1(bitmap_git, base, roots_bitmap))
+ bitmap_free(roots_bitmap);
+ }
+
/*
* Go through all the roots for the walk. The ones that have bitmaps
* on the bitmap index will be `or`ed together to form an initial
@@ -1262,11 +1354,21 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
*/
while (roots) {
struct object *object = roots->item;
+
roots = roots->next;
+ if (base) {
+ int pos = bitmap_position(bitmap_git, &object->oid);
+ if (pos > 0 && bitmap_get(base, pos)) {
+ object->flags |= SEEN;
+ continue;
+ }
+ }
+
if (object->type == OBJ_COMMIT &&
add_commit_to_bitmap(bitmap_git, &base, (struct commit *)object)) {
object->flags |= SEEN;
+ existing_bitmaps = 1;
continue;
}
@@ -1282,6 +1384,9 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
roots = not_mapped;
+ if (existing_bitmaps)
+ cascade_pseudo_merges_1(bitmap_git, base, NULL);
+
/*
* Let's iterate through all the roots that don't have bitmaps to
* check if we can determine them to be reachable from the existing
@@ -1866,6 +1971,11 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_free(&wants);
object_list_free(&haves);
+ trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied",
+ pseudo_merges_satisfied_nr);
+ trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades",
+ pseudo_merges_cascades_nr);
+
return bitmap_git;
cleanup:
diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh
new file mode 100755
index 0000000000..909c17e301
--- /dev/null
+++ b/t/t5333-pseudo-merge-bitmaps.sh
@@ -0,0 +1,325 @@
+#!/bin/sh
+
+test_description='pseudo-merge bitmaps'
+
+GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
+
+. ./test-lib.sh
+
+test_pseudo_merges () {
+ test-tool bitmap dump-pseudo-merges
+}
+
+test_pseudo_merge_commits () {
+ test-tool bitmap dump-pseudo-merge-commits "$1"
+}
+
+test_pseudo_merges_satisfied () {
+ test_trace2_data bitmap pseudo_merges_satisfied "$1"
+}
+
+test_pseudo_merges_cascades () {
+ test_trace2_data bitmap pseudo_merges_cascades "$1"
+}
+
+tag_everything () {
+ git rev-list --all --no-object-names >in &&
+ perl -lne '
+ print "create refs/tags/" . $. . " " . $1 if /([0-9a-f]+)/
+ ' <in | git update-ref --stdin
+}
+
+test_expect_success 'setup' '
+ test_commit_bulk 512 &&
+ tag_everything
+'
+
+test_expect_success 'bitmap traversal without pseudo-merges' '
+ git repack -adb &&
+
+ git rev-list --count --all --objects >expect &&
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT=$PWD/trace2.txt \
+ git rev-list --count --all --objects --use-bitmap-index >actual &&
+
+ test_pseudo_merges_satisfied 0 <trace2.txt &&
+ test_pseudo_merges_cascades 0 <trace2.txt &&
+ test_pseudo_merges >merges &&
+ test_must_be_empty merges &&
+ test_cmp expect actual
+'
+
+test_expect_success 'pseudo-merges accurately represent their objects' '
+ test_config bitmapPseudoMerge.test.pattern "refs/tags/" &&
+ test_config bitmapPseudoMerge.test.maxMerges 8 &&
+ test_config bitmapPseudoMerge.test.stableThreshold never &&
+
+ git repack -adb &&
+
+ test_pseudo_merges >merges &&
+ test_line_count = 8 merges &&
+
+ for i in $(test_seq 0 $(($(wc -l <merges)-1)))
+ do
+ test-tool bitmap dump-pseudo-merge-commits $i >commits &&
+
+ git rev-list --objects --no-object-names --stdin <commits >expect.raw &&
+ test-tool bitmap dump-pseudo-merge-objects $i >actual.raw &&
+
+ sort -u <expect.raw >expect &&
+ sort -u <actual.raw >actual &&
+
+ test_cmp expect actual || return 1
+ done
+'
+
+test_expect_success 'bitmap traversal with pseudo-merges' '
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT=$PWD/trace2.txt \
+ git rev-list --count --all --objects --use-bitmap-index >actual &&
+ git rev-list --count --all --objects >expect &&
+
+ test_pseudo_merges_satisfied 8 <trace2.txt &&
+ test_pseudo_merges_cascades 1 <trace2.txt &&
+ test_cmp expect actual
+'
+
+test_expect_success 'stale bitmap traversal with pseudo-merges' '
+ test_commit other &&
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT=$PWD/trace2.txt \
+ git rev-list --count --all --objects --use-bitmap-index >actual &&
+ git rev-list --count --all --objects >expect &&
+
+ test_pseudo_merges_satisfied 8 <trace2.txt &&
+ test_pseudo_merges_cascades 1 <trace2.txt &&
+ test_cmp expect actual
+'
+
+test_expect_success 'bitmapPseudoMerge.sampleRate adjusts commit selection rate' '
+ test_config bitmapPseudoMerge.test.pattern "refs/tags/" &&
+ test_config bitmapPseudoMerge.test.maxMerges 8 &&
+ test_config bitmapPseudoMerge.test.stableThreshold never &&
+
+ commits_nr=$(git rev-list --all --count) &&
+
+ for rate in 100 50 10
+ do
+ git -c bitmapPseudoMerge.test.sampleRate=$rate repack -adb &&
+
+ test_pseudo_merges >merges &&
+ for i in $(test_seq 0 $(($(wc -l <merges)-1)))
+ do
+ test_pseudo_merge_commits $i || return 1
+ done >commits &&
+
+ test-tool bitmap list-commits >bitmaps &&
+ bitmaps_nr="$(wc -l <bitmaps)" &&
+
+ perl -MPOSIX -e "print ceil((\$ARGV[0]/100)*(\$ARGV[1]-\$ARGV[2]))" \
+ "$rate" "$commits_nr" "$bitmaps_nr" >expect &&
+
+ test $(cat expect) -eq $(wc -l <commits) || return 1
+ done
+'
+
+test_expect_success 'bitmapPseudoMerge.threshold excludes newer commits' '
+ git init pseudo-merge-threshold &&
+ (
+ cd pseudo-merge-threshold &&
+
+ new="1672549200" && # 2023-01-01
+ old="1641013200" && # 2022-01-01
+
+ test_commit_bulk --message="new" --date "$new +0000" 128 &&
+ test_commit_bulk --message="old" --date "$old +0000" 128 &&
+ test_tick &&
+
+ tag_everything &&
+
+ git \
+ -c bitmapPseudoMerge.test.pattern="refs/tags/" \
+ -c bitmapPseudoMerge.test.maxMerges=1 \
+ -c bitmapPseudoMerge.test.threshold=$(($new - 1)) \
+ -c bitmapPseudoMerge.test.stableThreshold=never \
+ repack -adb &&
+
+ test_pseudo_merges >merges &&
+ test_line_count = 1 merges &&
+
+ test_pseudo_merge_commits 0 >oids &&
+ git cat-file --batch <oids >commits &&
+
+ test $(wc -l <oids) = $(grep -c "^committer.*$old +0000$" commits)
+ )
+'
+
+test_expect_success 'bitmapPseudoMerge.stableThreshold creates stable groups' '
+ (
+ cd pseudo-merge-threshold &&
+
+ new="1672549200" && # 2023-01-01
+ mid="1654059600" && # 2022-06-01
+ old="1641013200" && # 2022-01-01
+
+ test_commit_bulk --message="mid" --date "$mid +0000" 128 &&
+ test_tick &&
+
+ git for-each-ref --format="delete %(refname)" refs/tags >in &&
+ git update-ref --stdin <in &&
+
+ tag_everything &&
+
+ git \
+ -c bitmapPseudoMerge.test.pattern="refs/tags/" \
+ -c bitmapPseudoMerge.test.maxMerges=1 \
+ -c bitmapPseudoMerge.test.threshold=$(($new - 1)) \
+ -c bitmapPseudoMerge.test.stableThreshold=$(($mid - 1)) \
+ -c bitmapPseudoMerge.test.stableSize=10 \
+ repack -adb &&
+
+ test_pseudo_merges >merges &&
+ merges_nr="$(wc -l <merges)" &&
+
+ for i in $(test_seq $(($merges_nr - 1)))
+ do
+ test_pseudo_merge_commits 0 >oids &&
+ git cat-file --batch <oids >commits &&
+
+ expect="$(grep -c "^committer.*$old +0000$" commits)" &&
+ actual="$(wc -l <oids)" &&
+
+ test $expect = $actual || return 1
+ done &&
+
+ test_pseudo_merge_commits $(($merges_nr - 1)) >oids &&
+ git cat-file --batch <oids >commits &&
+ test $(wc -l <oids) = $(grep -c "^committer.*$mid +0000$" commits)
+ )
+'
+
+test_expect_success 'out of order thresholds are rejected' '
+ test_must_fail git \
+ -c bitmapPseudoMerge.test.pattern="refs/*" \
+ -c bitmapPseudoMerge.test.threshold=1.month.ago \
+ -c bitmapPseudoMerge.test.stableThreshold=1.week.ago \
+ repack -adb 2>err &&
+
+ cat >expect <<-EOF &&
+ fatal: pseudo-merge group ${SQ}test${SQ} has unstable threshold before stable one
+ EOF
+
+ test_cmp expect err
+'
+
+test_expect_success 'pseudo-merge pattern with capture groups' '
+ git init pseudo-merge-captures &&
+ (
+ cd pseudo-merge-captures &&
+
+ test_commit_bulk 128 &&
+ tag_everything &&
+
+ for r in $(test_seq 8)
+ do
+ test_commit_bulk 16 &&
+
+ git rev-list HEAD~16.. >in &&
+
+ perl -lne "print \"create refs/remotes/$r/tags/\$. \$_\"" <in |
+ git update-ref --stdin || return 1
+ done &&
+
+ git \
+ -c bitmapPseudoMerge.tags.pattern="refs/remotes/([0-9]+)/tags/" \
+ -c bitmapPseudoMerge.tags.maxMerges=1 \
+ repack -adb &&
+
+ git for-each-ref --format="%(objectname) %(refname)" >refs &&
+
+ test_pseudo_merges >merges &&
+ for m in $(test_seq 0 $(($(wc -l <merges) - 1)))
+ do
+ test_pseudo_merge_commits $m >oids &&
+ grep -f oids refs |
+ perl -lne "print \$1 if /refs\/remotes\/([0-9]+)/" |
+ sort -u || return 1
+ done >remotes &&
+
+ test $(wc -l <remotes) -eq $(sort -u <remotes | wc -l)
+ )
+'
+
+test_expect_success 'pseudo-merge overlap setup' '
+ git init pseudo-merge-overlap &&
+ (
+ cd pseudo-merge-overlap &&
+
+ test_commit_bulk 256 &&
+ tag_everything &&
+
+ git \
+ -c bitmapPseudoMerge.all.pattern="refs/" \
+ -c bitmapPseudoMerge.all.maxMerges=1 \
+ -c bitmapPseudoMerge.all.stableThreshold=never \
+ -c bitmapPseudoMerge.tags.pattern="refs/tags/" \
+ -c bitmapPseudoMerge.tags.maxMerges=1 \
+ -c bitmapPseudoMerge.tags.stableThreshold=never \
+ repack -adb
+ )
+'
+
+test_expect_success 'pseudo-merge overlap generates overlapping groups' '
+ (
+ cd pseudo-merge-overlap &&
+
+ test_pseudo_merges >merges &&
+ test_line_count = 2 merges &&
+
+ test_pseudo_merge_commits 0 >commits-0.raw &&
+ test_pseudo_merge_commits 1 >commits-1.raw &&
+
+ sort commits-0.raw >commits-0 &&
+ sort commits-1.raw >commits-1 &&
+
+ comm -12 commits-0 commits-1 >overlap &&
+
+ test_line_count -gt 0 overlap
+ )
+'
+
+test_expect_success 'pseudo-merge overlap traversal' '
+ (
+ cd pseudo-merge-overlap &&
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT=$PWD/trace2.txt \
+ git rev-list --count --all --objects --use-bitmap-index >actual &&
+ git rev-list --count --all --objects >expect &&
+
+ test_pseudo_merges_satisfied 2 <trace2.txt &&
+ test_pseudo_merges_cascades 1 <trace2.txt &&
+ test_cmp expect actual
+ )
+'
+
+test_expect_success 'pseudo-merge overlap stale traversal' '
+ (
+ cd pseudo-merge-overlap &&
+
+ test_commit other &&
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT=$PWD/trace2.txt \
+ git rev-list --count --all --objects --use-bitmap-index >actual &&
+ git rev-list --count --all --objects >expect &&
+
+ test_pseudo_merges_satisfied 2 <trace2.txt &&
+ test_pseudo_merges_cascades 1 <trace2.txt &&
+ test_cmp expect actual
+ )
+'
+
+test_done