From 5227c385667cadd3b34668329016755181fa98ea Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Jul 2018 16:33:02 +0000 Subject: commit-reach: move walk methods from commit.c There are several commit walks in the codebase. Group them together into a new commit-reach.c file and corresponding header. After we group these walks into one place, we can reduce duplicate logic by calling equivalent methods. The method declarations in commit.h are not touched by this commit and will be moved in a following commit. Many consumers need to point to commit-reach.h and that would bloat this commit. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-reach.c | 360 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 360 insertions(+) create mode 100644 commit-reach.c (limited to 'commit-reach.c') diff --git a/commit-reach.c b/commit-reach.c new file mode 100644 index 0000000000..8ab6044414 --- /dev/null +++ b/commit-reach.c @@ -0,0 +1,360 @@ +#include "cache.h" +#include "prio-queue.h" +#include "commit.h" +#include "commit-reach.h" + +/* Remember to update object flag allocation in object.h */ +#define PARENT1 (1u<<16) +#define PARENT2 (1u<<17) +#define STALE (1u<<18) +#define RESULT (1u<<19) + +static const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT); + +static int queue_has_nonstale(struct prio_queue *queue) +{ + int i; + for (i = 0; i < queue->nr; i++) { + struct commit *commit = queue->array[i].data; + if (!(commit->object.flags & STALE)) + return 1; + } + return 0; +} + +/* all input commits in one and twos[] must have been parsed! */ +static struct commit_list *paint_down_to_common(struct commit *one, int n, + struct commit **twos, + int min_generation) +{ + struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; + struct commit_list *result = NULL; + int i; + uint32_t last_gen = GENERATION_NUMBER_INFINITY; + + one->object.flags |= PARENT1; + if (!n) { + commit_list_append(one, &result); + return result; + } + prio_queue_put(&queue, one); + + for (i = 0; i < n; i++) { + twos[i]->object.flags |= PARENT2; + prio_queue_put(&queue, twos[i]); + } + + while (queue_has_nonstale(&queue)) { + struct commit *commit = prio_queue_get(&queue); + struct commit_list *parents; + int flags; + + if (commit->generation > last_gen) + BUG("bad generation skip %8x > %8x at %s", + commit->generation, last_gen, + oid_to_hex(&commit->object.oid)); + last_gen = commit->generation; + + if (commit->generation < min_generation) + break; + + flags = commit->object.flags & (PARENT1 | PARENT2 | STALE); + if (flags == (PARENT1 | PARENT2)) { + if (!(commit->object.flags & RESULT)) { + commit->object.flags |= RESULT; + commit_list_insert_by_date(commit, &result); + } + /* Mark parents of a found merge stale */ + flags |= STALE; + } + parents = commit->parents; + while (parents) { + struct commit *p = parents->item; + parents = parents->next; + if ((p->object.flags & flags) == flags) + continue; + if (parse_commit(p)) + return NULL; + p->object.flags |= flags; + prio_queue_put(&queue, p); + } + } + + clear_prio_queue(&queue); + return result; +} + +static struct commit_list *merge_bases_many(struct commit *one, int n, struct commit **twos) +{ + struct commit_list *list = NULL; + struct commit_list *result = NULL; + int i; + + for (i = 0; i < n; i++) { + if (one == twos[i]) + /* + * We do not mark this even with RESULT so we do not + * have to clean it up. + */ + return commit_list_insert(one, &result); + } + + if (parse_commit(one)) + return NULL; + for (i = 0; i < n; i++) { + if (parse_commit(twos[i])) + return NULL; + } + + list = paint_down_to_common(one, n, twos, 0); + + while (list) { + struct commit *commit = pop_commit(&list); + if (!(commit->object.flags & STALE)) + commit_list_insert_by_date(commit, &result); + } + return result; +} + +struct commit_list *get_octopus_merge_bases(struct commit_list *in) +{ + struct commit_list *i, *j, *k, *ret = NULL; + + if (!in) + return ret; + + commit_list_insert(in->item, &ret); + + for (i = in->next; i; i = i->next) { + struct commit_list *new_commits = NULL, *end = NULL; + + for (j = ret; j; j = j->next) { + struct commit_list *bases; + bases = get_merge_bases(i->item, j->item); + if (!new_commits) + new_commits = bases; + else + end->next = bases; + for (k = bases; k; k = k->next) + end = k; + } + ret = new_commits; + } + return ret; +} + +static int remove_redundant(struct commit **array, int cnt) +{ + /* + * Some commit in the array may be an ancestor of + * another commit. Move such commit to the end of + * the array, and return the number of commits that + * are independent from each other. + */ + struct commit **work; + unsigned char *redundant; + int *filled_index; + int i, j, filled; + + work = xcalloc(cnt, sizeof(*work)); + redundant = xcalloc(cnt, 1); + ALLOC_ARRAY(filled_index, cnt - 1); + + for (i = 0; i < cnt; i++) + parse_commit(array[i]); + for (i = 0; i < cnt; i++) { + struct commit_list *common; + uint32_t min_generation = array[i]->generation; + + if (redundant[i]) + continue; + for (j = filled = 0; j < cnt; j++) { + if (i == j || redundant[j]) + continue; + filled_index[filled] = j; + work[filled++] = array[j]; + + if (array[j]->generation < min_generation) + min_generation = array[j]->generation; + } + common = paint_down_to_common(array[i], filled, work, + min_generation); + if (array[i]->object.flags & PARENT2) + redundant[i] = 1; + for (j = 0; j < filled; j++) + if (work[j]->object.flags & PARENT1) + redundant[filled_index[j]] = 1; + clear_commit_marks(array[i], all_flags); + clear_commit_marks_many(filled, work, all_flags); + free_commit_list(common); + } + + /* Now collect the result */ + COPY_ARRAY(work, array, cnt); + for (i = filled = 0; i < cnt; i++) + if (!redundant[i]) + array[filled++] = work[i]; + for (j = filled, i = 0; i < cnt; i++) + if (redundant[i]) + array[j++] = work[i]; + free(work); + free(redundant); + free(filled_index); + return filled; +} + +static struct commit_list *get_merge_bases_many_0(struct commit *one, + int n, + struct commit **twos, + int cleanup) +{ + struct commit_list *list; + struct commit **rslt; + struct commit_list *result; + int cnt, i; + + result = merge_bases_many(one, n, twos); + for (i = 0; i < n; i++) { + if (one == twos[i]) + return result; + } + if (!result || !result->next) { + if (cleanup) { + clear_commit_marks(one, all_flags); + clear_commit_marks_many(n, twos, all_flags); + } + return result; + } + + /* There are more than one */ + cnt = commit_list_count(result); + rslt = xcalloc(cnt, sizeof(*rslt)); + for (list = result, i = 0; list; list = list->next) + rslt[i++] = list->item; + free_commit_list(result); + + clear_commit_marks(one, all_flags); + clear_commit_marks_many(n, twos, all_flags); + + cnt = remove_redundant(rslt, cnt); + result = NULL; + for (i = 0; i < cnt; i++) + commit_list_insert_by_date(rslt[i], &result); + free(rslt); + return result; +} + +struct commit_list *get_merge_bases_many(struct commit *one, + int n, + struct commit **twos) +{ + return get_merge_bases_many_0(one, n, twos, 1); +} + +struct commit_list *get_merge_bases_many_dirty(struct commit *one, + int n, + struct commit **twos) +{ + return get_merge_bases_many_0(one, n, twos, 0); +} + +struct commit_list *get_merge_bases(struct commit *one, struct commit *two) +{ + return get_merge_bases_many_0(one, 1, &two, 1); +} + +/* + * Is "commit" a descendant of one of the elements on the "with_commit" list? + */ +int is_descendant_of(struct commit *commit, struct commit_list *with_commit) +{ + if (!with_commit) + return 1; + while (with_commit) { + struct commit *other; + + other = with_commit->item; + with_commit = with_commit->next; + if (in_merge_bases(other, commit)) + return 1; + } + return 0; +} + +/* + * Is "commit" an ancestor of one of the "references"? + */ +int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference) +{ + struct commit_list *bases; + int ret = 0, i; + uint32_t min_generation = GENERATION_NUMBER_INFINITY; + + if (parse_commit(commit)) + return ret; + for (i = 0; i < nr_reference; i++) { + if (parse_commit(reference[i])) + return ret; + if (reference[i]->generation < min_generation) + min_generation = reference[i]->generation; + } + + if (commit->generation > min_generation) + return ret; + + bases = paint_down_to_common(commit, nr_reference, reference, commit->generation); + if (commit->object.flags & PARENT2) + ret = 1; + clear_commit_marks(commit, all_flags); + clear_commit_marks_many(nr_reference, reference, all_flags); + free_commit_list(bases); + return ret; +} + +/* + * Is "commit" an ancestor of (i.e. reachable from) the "reference"? + */ +int in_merge_bases(struct commit *commit, struct commit *reference) +{ + return in_merge_bases_many(commit, 1, &reference); +} + +struct commit_list *reduce_heads(struct commit_list *heads) +{ + struct commit_list *p; + struct commit_list *result = NULL, **tail = &result; + struct commit **array; + int num_head, i; + + if (!heads) + return NULL; + + /* Uniquify */ + for (p = heads; p; p = p->next) + p->item->object.flags &= ~STALE; + for (p = heads, num_head = 0; p; p = p->next) { + if (p->item->object.flags & STALE) + continue; + p->item->object.flags |= STALE; + num_head++; + } + array = xcalloc(num_head, sizeof(*array)); + for (p = heads, i = 0; p; p = p->next) { + if (p->item->object.flags & STALE) { + array[i++] = p->item; + p->item->object.flags &= ~STALE; + } + } + num_head = remove_redundant(array, num_head); + for (i = 0; i < num_head; i++) + tail = &commit_list_insert(array[i], tail)->next; + free(array); + return result; +} + +void reduce_heads_replace(struct commit_list **heads) +{ + struct commit_list *result = reduce_heads(*heads); + free_commit_list(*heads); + *heads = result; +} -- cgit 1.2.3-korg From 1d614d41e5fc030bc2f2799a58791aa912f78378 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Jul 2018 16:33:06 +0000 Subject: commit-reach: move ref_newer from remote.c There are several commit walks in the codebase. Group them together into a new commit-reach.c file and corresponding header. After we group these walks into one place, we can reduce duplicate logic by calling equivalent methods. The ref_newer() method is used by 'git push -f' to check if a force-push is necessary. By making the method public, we make it possible to test the method directly without setting up an envieronment where a 'git push' call makes sense. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/remote.c | 1 + commit-reach.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- commit-reach.h | 2 ++ remote.c | 49 ------------------------------------------------- remote.h | 1 - 5 files changed, 57 insertions(+), 51 deletions(-) (limited to 'commit-reach.c') diff --git a/builtin/remote.c b/builtin/remote.c index c74ee88690..79b0326446 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -10,6 +10,7 @@ #include "refspec.h" #include "object-store.h" #include "argv-array.h" +#include "commit-reach.h" static const char * const builtin_remote_usage[] = { N_("git remote [-v | --verbose]"), diff --git a/commit-reach.c b/commit-reach.c index 8ab6044414..a6bc4781a6 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -1,6 +1,10 @@ #include "cache.h" -#include "prio-queue.h" #include "commit.h" +#include "decorate.h" +#include "prio-queue.h" +#include "tree.h" +#include "revision.h" +#include "tag.h" #include "commit-reach.h" /* Remember to update object flag allocation in object.h */ @@ -358,3 +362,52 @@ void reduce_heads_replace(struct commit_list **heads) free_commit_list(*heads); *heads = result; } + +static void unmark_and_free(struct commit_list *list, unsigned int mark) +{ + while (list) { + struct commit *commit = pop_commit(&list); + commit->object.flags &= ~mark; + } +} + +int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid) +{ + struct object *o; + struct commit *old_commit, *new_commit; + struct commit_list *list, *used; + int found = 0; + + /* + * Both new_commit and old_commit must be commit-ish and new_commit is descendant of + * old_commit. Otherwise we require --force. + */ + o = deref_tag(the_repository, parse_object(the_repository, old_oid), + NULL, 0); + if (!o || o->type != OBJ_COMMIT) + return 0; + old_commit = (struct commit *) o; + + o = deref_tag(the_repository, parse_object(the_repository, new_oid), + NULL, 0); + if (!o || o->type != OBJ_COMMIT) + return 0; + new_commit = (struct commit *) o; + + if (parse_commit(new_commit) < 0) + return 0; + + used = list = NULL; + commit_list_insert(new_commit, &list); + while (list) { + new_commit = pop_most_recent_commit(&list, TMP_MARK); + commit_list_insert(new_commit, &used); + if (new_commit == old_commit) { + found = 1; + break; + } + } + unmark_and_free(list, TMP_MARK); + unmark_and_free(used, TMP_MARK); + return found; +} diff --git a/commit-reach.h b/commit-reach.h index 1ea2696e40..f1cf9bfcd8 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -39,4 +39,6 @@ struct commit_list *reduce_heads(struct commit_list *heads); */ void reduce_heads_replace(struct commit_list **heads); +int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid); + #endif diff --git a/remote.c b/remote.c index 8e99b9888a..f0c23bae48 100644 --- a/remote.c +++ b/remote.c @@ -1784,55 +1784,6 @@ int resolve_remote_symref(struct ref *ref, struct ref *list) return 1; } -static void unmark_and_free(struct commit_list *list, unsigned int mark) -{ - while (list) { - struct commit *commit = pop_commit(&list); - commit->object.flags &= ~mark; - } -} - -int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid) -{ - struct object *o; - struct commit *old_commit, *new_commit; - struct commit_list *list, *used; - int found = 0; - - /* - * Both new_commit and old_commit must be commit-ish and new_commit is descendant of - * old_commit. Otherwise we require --force. - */ - o = deref_tag(the_repository, parse_object(the_repository, old_oid), - NULL, 0); - if (!o || o->type != OBJ_COMMIT) - return 0; - old_commit = (struct commit *) o; - - o = deref_tag(the_repository, parse_object(the_repository, new_oid), - NULL, 0); - if (!o || o->type != OBJ_COMMIT) - return 0; - new_commit = (struct commit *) o; - - if (parse_commit(new_commit) < 0) - return 0; - - used = list = NULL; - commit_list_insert(new_commit, &list); - while (list) { - new_commit = pop_most_recent_commit(&list, TMP_MARK); - commit_list_insert(new_commit, &used); - if (new_commit == old_commit) { - found = 1; - break; - } - } - unmark_and_free(list, TMP_MARK); - unmark_and_free(used, TMP_MARK); - return found; -} - /* * Lookup the upstream branch for the given branch and if present, optionally * compute the commit ahead/behind values for the pair. diff --git a/remote.h b/remote.h index 45ecc6cefa..56fb9cbb27 100644 --- a/remote.h +++ b/remote.h @@ -149,7 +149,6 @@ extern struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, const struct string_list *server_options); int resolve_remote_symref(struct ref *ref, struct ref *list); -int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid); /* * Remove and free all but the first of any entries in the input list -- cgit 1.2.3-korg From 920f93ca1c005a81cfb88c87fca18de5e4bde8c5 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Jul 2018 16:33:08 +0000 Subject: commit-reach: move commit_contains from ref-filter There are several commit walks in the codebase. Group them together into a new commit-reach.c file and corresponding header. After we group these walks into one place, we can reduce duplicate logic by calling equivalent methods. All methods are direct moves, except we also make the commit_contains() method public so its consumers in ref-filter.c can still call it. We can also test this method in a test-tool in a later commit. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-reach.c | 121 +++++++++++++++++++++++++++++++++++++++++++++++ commit-reach.h | 20 +++++++- ref-filter.c | 145 +++------------------------------------------------------ 3 files changed, 147 insertions(+), 139 deletions(-) (limited to 'commit-reach.c') diff --git a/commit-reach.c b/commit-reach.c index a6bc4781a6..01d796f011 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -1,8 +1,10 @@ #include "cache.h" #include "commit.h" +#include "commit-graph.h" #include "decorate.h" #include "prio-queue.h" #include "tree.h" +#include "ref-filter.c" #include "revision.h" #include "tag.h" #include "commit-reach.h" @@ -411,3 +413,122 @@ int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid) unmark_and_free(used, TMP_MARK); return found; } + +/* + * Mimicking the real stack, this stack lives on the heap, avoiding stack + * overflows. + * + * At each recursion step, the stack items points to the commits whose + * ancestors are to be inspected. + */ +struct contains_stack { + int nr, alloc; + struct contains_stack_entry { + struct commit *commit; + struct commit_list *parents; + } *contains_stack; +}; + +static int in_commit_list(const struct commit_list *want, struct commit *c) +{ + for (; want; want = want->next) + if (!oidcmp(&want->item->object.oid, &c->object.oid)) + return 1; + return 0; +} + +/* + * Test whether the candidate is contained in the list. + * Do not recurse to find out, though, but return -1 if inconclusive. + */ +static enum contains_result contains_test(struct commit *candidate, + const struct commit_list *want, + struct contains_cache *cache, + uint32_t cutoff) +{ + enum contains_result *cached = contains_cache_at(cache, candidate); + + /* If we already have the answer cached, return that. */ + if (*cached) + return *cached; + + /* or are we it? */ + if (in_commit_list(want, candidate)) { + *cached = CONTAINS_YES; + return CONTAINS_YES; + } + + /* Otherwise, we don't know; prepare to recurse */ + parse_commit_or_die(candidate); + + if (candidate->generation < cutoff) + return CONTAINS_NO; + + return CONTAINS_UNKNOWN; +} + +static void push_to_contains_stack(struct commit *candidate, struct contains_stack *contains_stack) +{ + ALLOC_GROW(contains_stack->contains_stack, contains_stack->nr + 1, contains_stack->alloc); + contains_stack->contains_stack[contains_stack->nr].commit = candidate; + contains_stack->contains_stack[contains_stack->nr++].parents = candidate->parents; +} + +static enum contains_result contains_tag_algo(struct commit *candidate, + const struct commit_list *want, + struct contains_cache *cache) +{ + struct contains_stack contains_stack = { 0, 0, NULL }; + enum contains_result result; + uint32_t cutoff = GENERATION_NUMBER_INFINITY; + const struct commit_list *p; + + for (p = want; p; p = p->next) { + struct commit *c = p->item; + load_commit_graph_info(the_repository, c); + if (c->generation < cutoff) + cutoff = c->generation; + } + + result = contains_test(candidate, want, cache, cutoff); + if (result != CONTAINS_UNKNOWN) + return result; + + push_to_contains_stack(candidate, &contains_stack); + while (contains_stack.nr) { + struct contains_stack_entry *entry = &contains_stack.contains_stack[contains_stack.nr - 1]; + struct commit *commit = entry->commit; + struct commit_list *parents = entry->parents; + + if (!parents) { + *contains_cache_at(cache, commit) = CONTAINS_NO; + contains_stack.nr--; + } + /* + * If we just popped the stack, parents->item has been marked, + * therefore contains_test will return a meaningful yes/no. + */ + else switch (contains_test(parents->item, want, cache, cutoff)) { + case CONTAINS_YES: + *contains_cache_at(cache, commit) = CONTAINS_YES; + contains_stack.nr--; + break; + case CONTAINS_NO: + entry->parents = parents->next; + break; + case CONTAINS_UNKNOWN: + push_to_contains_stack(parents->item, &contains_stack); + break; + } + } + free(contains_stack.contains_stack); + return contains_test(candidate, want, cache, cutoff); +} + +int commit_contains(struct ref_filter *filter, struct commit *commit, + struct commit_list *list, struct contains_cache *cache) +{ + if (filter->with_commit_tag_algo) + return contains_tag_algo(commit, list, cache) == CONTAINS_YES; + return is_descendant_of(commit, list); +} diff --git a/commit-reach.h b/commit-reach.h index f1cf9bfcd8..13dec25cee 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -1,8 +1,12 @@ #ifndef __COMMIT_REACH_H__ #define __COMMIT_REACH_H__ +#include "commit-slab.h" + struct commit; struct commit_list; +struct contains_cache; +struct ref_filter; struct commit_list *get_merge_bases_many(struct commit *one, int n, @@ -20,7 +24,6 @@ int is_descendant_of(struct commit *commit, struct commit_list *with_commit); int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference); int in_merge_bases(struct commit *commit, struct commit *reference); - /* * Takes a list of commits and returns a new list where those * have been removed that can be reached from other commits in @@ -41,4 +44,19 @@ void reduce_heads_replace(struct commit_list **heads); int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid); +/* + * Unknown has to be "0" here, because that's the default value for + * contains_cache slab entries that have not yet been assigned. + */ +enum contains_result { + CONTAINS_UNKNOWN = 0, + CONTAINS_NO, + CONTAINS_YES +}; + +define_commit_slab(contains_cache, enum contains_result); + +int commit_contains(struct ref_filter *filter, struct commit *commit, + struct commit_list *list, struct contains_cache *cache); + #endif diff --git a/ref-filter.c b/ref-filter.c index fca3ad040a..495e830fa5 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -1624,144 +1624,6 @@ static int get_ref_atom_value(struct ref_array_item *ref, int atom, return 0; } -/* - * Unknown has to be "0" here, because that's the default value for - * contains_cache slab entries that have not yet been assigned. - */ -enum contains_result { - CONTAINS_UNKNOWN = 0, - CONTAINS_NO, - CONTAINS_YES -}; - -define_commit_slab(contains_cache, enum contains_result); - -struct ref_filter_cbdata { - struct ref_array *array; - struct ref_filter *filter; - struct contains_cache contains_cache; - struct contains_cache no_contains_cache; -}; - -/* - * Mimicking the real stack, this stack lives on the heap, avoiding stack - * overflows. - * - * At each recursion step, the stack items points to the commits whose - * ancestors are to be inspected. - */ -struct contains_stack { - int nr, alloc; - struct contains_stack_entry { - struct commit *commit; - struct commit_list *parents; - } *contains_stack; -}; - -static int in_commit_list(const struct commit_list *want, struct commit *c) -{ - for (; want; want = want->next) - if (!oidcmp(&want->item->object.oid, &c->object.oid)) - return 1; - return 0; -} - -/* - * Test whether the candidate is contained in the list. - * Do not recurse to find out, though, but return -1 if inconclusive. - */ -static enum contains_result contains_test(struct commit *candidate, - const struct commit_list *want, - struct contains_cache *cache, - uint32_t cutoff) -{ - enum contains_result *cached = contains_cache_at(cache, candidate); - - /* If we already have the answer cached, return that. */ - if (*cached) - return *cached; - - /* or are we it? */ - if (in_commit_list(want, candidate)) { - *cached = CONTAINS_YES; - return CONTAINS_YES; - } - - /* Otherwise, we don't know; prepare to recurse */ - parse_commit_or_die(candidate); - - if (candidate->generation < cutoff) - return CONTAINS_NO; - - return CONTAINS_UNKNOWN; -} - -static void push_to_contains_stack(struct commit *candidate, struct contains_stack *contains_stack) -{ - ALLOC_GROW(contains_stack->contains_stack, contains_stack->nr + 1, contains_stack->alloc); - contains_stack->contains_stack[contains_stack->nr].commit = candidate; - contains_stack->contains_stack[contains_stack->nr++].parents = candidate->parents; -} - -static enum contains_result contains_tag_algo(struct commit *candidate, - const struct commit_list *want, - struct contains_cache *cache) -{ - struct contains_stack contains_stack = { 0, 0, NULL }; - enum contains_result result; - uint32_t cutoff = GENERATION_NUMBER_INFINITY; - const struct commit_list *p; - - for (p = want; p; p = p->next) { - struct commit *c = p->item; - load_commit_graph_info(the_repository, c); - if (c->generation < cutoff) - cutoff = c->generation; - } - - result = contains_test(candidate, want, cache, cutoff); - if (result != CONTAINS_UNKNOWN) - return result; - - push_to_contains_stack(candidate, &contains_stack); - while (contains_stack.nr) { - struct contains_stack_entry *entry = &contains_stack.contains_stack[contains_stack.nr - 1]; - struct commit *commit = entry->commit; - struct commit_list *parents = entry->parents; - - if (!parents) { - *contains_cache_at(cache, commit) = CONTAINS_NO; - contains_stack.nr--; - } - /* - * If we just popped the stack, parents->item has been marked, - * therefore contains_test will return a meaningful yes/no. - */ - else switch (contains_test(parents->item, want, cache, cutoff)) { - case CONTAINS_YES: - *contains_cache_at(cache, commit) = CONTAINS_YES; - contains_stack.nr--; - break; - case CONTAINS_NO: - entry->parents = parents->next; - break; - case CONTAINS_UNKNOWN: - push_to_contains_stack(parents->item, &contains_stack); - break; - } - } - free(contains_stack.contains_stack); - return contains_test(candidate, want, cache, cutoff); -} - -static int commit_contains(struct ref_filter *filter, struct commit *commit, - struct commit_list *list, struct contains_cache *cache) -{ - if (filter->with_commit_tag_algo) - return contains_tag_algo(commit, list, cache) == CONTAINS_YES; - return is_descendant_of(commit, list); -} - /* * Return 1 if the refname matches one of the patterns, otherwise 0. * A pattern can be a literal prefix (e.g. a refname "refs/heads/master" @@ -1988,6 +1850,13 @@ static int filter_ref_kind(struct ref_filter *filter, const char *refname) return ref_kind_from_refname(refname); } +struct ref_filter_cbdata { + struct ref_array *array; + struct ref_filter *filter; + struct contains_cache contains_cache; + struct contains_cache no_contains_cache; +}; + /* * A call-back given to for_each_ref(). Filter refs and keep them for * later object processing. -- cgit 1.2.3-korg From ba3ca1edce70d9d2d0eea1ac69377ae786e9413a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Jul 2018 16:33:13 +0000 Subject: commit-reach: move can_all_from_reach_with_flags There are several commit walks in the codebase. Group them together into a new commit-reach.c file and corresponding header. After we group these walks into one place, we can reduce duplicate logic by calling equivalent methods. The can_all_from_reach_with_flags method is used in a stateful way by upload-pack.c. The parameters are very flexible, so we will be able to use its commit walking logic for many other callers. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-reach.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++ commit-reach.h | 14 ++++++++++++ object.h | 4 ++-- upload-pack.c | 70 +--------------------------------------------------------- 4 files changed, 80 insertions(+), 71 deletions(-) (limited to 'commit-reach.c') diff --git a/commit-reach.c b/commit-reach.c index 01d796f011..d806291d5d 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -10,6 +10,7 @@ #include "commit-reach.h" /* Remember to update object flag allocation in object.h */ +#define REACHABLE (1u<<15) #define PARENT1 (1u<<16) #define PARENT2 (1u<<17) #define STALE (1u<<18) @@ -532,3 +533,65 @@ int commit_contains(struct ref_filter *filter, struct commit *commit, return contains_tag_algo(commit, list, cache) == CONTAINS_YES; return is_descendant_of(commit, list); } + +int reachable(struct commit *from, unsigned int with_flag, + unsigned int assign_flag, time_t min_commit_date) +{ + struct prio_queue work = { compare_commits_by_commit_date }; + + prio_queue_put(&work, from); + while (work.nr) { + struct commit_list *list; + struct commit *commit = prio_queue_get(&work); + + if (commit->object.flags & with_flag) { + from->object.flags |= assign_flag; + break; + } + if (!commit->object.parsed) + parse_object(the_repository, &commit->object.oid); + if (commit->object.flags & REACHABLE) + continue; + commit->object.flags |= REACHABLE; + if (commit->date < min_commit_date) + continue; + for (list = commit->parents; list; list = list->next) { + struct commit *parent = list->item; + if (!(parent->object.flags & REACHABLE)) + prio_queue_put(&work, parent); + } + } + from->object.flags |= REACHABLE; + clear_commit_marks(from, REACHABLE); + clear_prio_queue(&work); + return (from->object.flags & assign_flag); +} + +int can_all_from_reach_with_flag(struct object_array *from, + unsigned int with_flag, + unsigned int assign_flag, + time_t min_commit_date) +{ + int i; + + for (i = 0; i < from->nr; i++) { + struct object *from_one = from->objects[i].item; + + if (from_one->flags & assign_flag) + continue; + from_one = deref_tag(the_repository, from_one, "a from object", 0); + if (!from_one || from_one->type != OBJ_COMMIT) { + /* no way to tell if this is reachable by + * looking at the ancestry chain alone, so + * leave a note to ourselves not to worry about + * this object anymore. + */ + from->objects[i].item->flags |= assign_flag; + continue; + } + if (!reachable((struct commit *)from_one, with_flag, assign_flag, + min_commit_date)) + return 0; + } + return 1; +} diff --git a/commit-reach.h b/commit-reach.h index 13dec25cee..b28bc22fcd 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -59,4 +59,18 @@ define_commit_slab(contains_cache, enum contains_result); int commit_contains(struct ref_filter *filter, struct commit *commit, struct commit_list *list, struct contains_cache *cache); +int reachable(struct commit *from, unsigned int with_flag, + unsigned int assign_flag, time_t min_commit_date); + +/* + * Determine if every commit in 'from' can reach at least one commit + * that is marked with 'with_flag'. As we traverse, use 'assign_flag' + * as a marker for commits that are already visited. Do not walk + * commits with date below 'min_commit_date'. + */ +int can_all_from_reach_with_flag(struct object_array *from, + unsigned int with_flag, + unsigned int assign_flag, + time_t min_commit_date); + #endif diff --git a/object.h b/object.h index 18c2b073e3..b132944c51 100644 --- a/object.h +++ b/object.h @@ -60,12 +60,12 @@ struct object_array { * revision.h: 0---------10 26 * fetch-pack.c: 0----5 * walker.c: 0-2 - * upload-pack.c: 4 11----------------19 + * upload-pack.c: 4 11-----14 16-----19 * builtin/blame.c: 12-13 * bisect.c: 16 * bundle.c: 16 * http-push.c: 16-----19 - * commit-reach.c: 16-----19 + * commit-reach.c: 15-------19 * sha1-name.c: 20 * list-objects-filter.c: 21 * builtin/fsck.c: 0--3 diff --git a/upload-pack.c b/upload-pack.c index 427de461d8..11c426685d 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -24,13 +24,13 @@ #include "quote.h" #include "upload-pack.h" #include "serve.h" +#include "commit-reach.h" /* Remember to update object flag allocation in object.h */ #define THEY_HAVE (1u << 11) #define OUR_REF (1u << 12) #define WANTED (1u << 13) #define COMMON_KNOWN (1u << 14) -#define REACHABLE (1u << 15) #define SHALLOW (1u << 16) #define NOT_SHALLOW (1u << 17) @@ -336,74 +336,6 @@ static int got_oid(const char *hex, struct object_id *oid) return 0; } -static int reachable(struct commit *from, unsigned int with_flag, - unsigned int assign_flag, time_t min_commit_date) -{ - struct prio_queue work = { compare_commits_by_commit_date }; - - prio_queue_put(&work, from); - while (work.nr) { - struct commit_list *list; - struct commit *commit = prio_queue_get(&work); - - if (commit->object.flags & with_flag) { - from->object.flags |= assign_flag; - break; - } - if (!commit->object.parsed) - parse_object(the_repository, &commit->object.oid); - if (commit->object.flags & REACHABLE) - continue; - commit->object.flags |= REACHABLE; - if (commit->date < min_commit_date) - continue; - for (list = commit->parents; list; list = list->next) { - struct commit *parent = list->item; - if (!(parent->object.flags & REACHABLE)) - prio_queue_put(&work, parent); - } - } - from->object.flags |= REACHABLE; - clear_commit_marks(from, REACHABLE); - clear_prio_queue(&work); - return (from->object.flags & assign_flag); -} - -/* - * Determine if every commit in 'from' can reach at least one commit - * that is marked with 'with_flag'. As we traverse, use 'assign_flag' - * as a marker for commits that are already visited. Do not walk - * commits with date below 'min_commit_date'. - */ -static int can_all_from_reach_with_flag(struct object_array *from, - unsigned int with_flag, - unsigned int assign_flag, - time_t min_commit_date) -{ - int i; - - for (i = 0; i < from->nr; i++) { - struct object *from_one = from->objects[i].item; - - if (from_one->flags & assign_flag) - continue; - from_one = deref_tag(the_repository, from_one, "a from object", 0); - if (!from_one || from_one->type != OBJ_COMMIT) { - /* no way to tell if this is reachable by - * looking at the ancestry chain alone, so - * leave a note to ourselves not to worry about - * this object anymore. - */ - from->objects[i].item->flags |= assign_flag; - continue; - } - if (!reachable((struct commit *)from_one, with_flag, assign_flag, - min_commit_date)) - return 0; - } - return 1; -} - static int ok_to_give_up(void) { if (!have_obj.nr) -- cgit 1.2.3-korg From 1792bc125069e3e5b59f0158e259335a07aa7cf5 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Jul 2018 16:33:23 +0000 Subject: test-reach: test can_all_from_reach_with_flags The can_all_from_reach_with_flags method is used by ok_to_give_up in upload-pack.c to see if we have done enough negotiation during a fetch. This method is intentionally created to preserve state between calls to assist with stateful negotiation, such as over SSH. To make this method testable, add a new can_all_from_reach method that does the initial setup and final tear-down. We will later use this method in production code. Call the method from 'test-tool reach' for now. Since this is a many-to-many reachability query, add a new type of input to the 'test-tool reach' input format. Lines "Y:" create a list of commits to be the reachability targets from the commits in the 'X' list. In the context of fetch negotiation, the 'X' commits are the 'want' commits and the 'Y' commits are the 'have' commits. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-reach.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ commit-reach.h | 2 ++ t/helper/test-reach.c | 10 ++++++++-- t/t6600-test-reach.sh | 45 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 2 deletions(-) (limited to 'commit-reach.c') diff --git a/commit-reach.c b/commit-reach.c index d806291d5d..940fbf2e17 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -595,3 +595,50 @@ int can_all_from_reach_with_flag(struct object_array *from, } return 1; } + +int can_all_from_reach(struct commit_list *from, struct commit_list *to, + int cutoff_by_min_date) +{ + struct object_array from_objs = OBJECT_ARRAY_INIT; + time_t min_commit_date = cutoff_by_min_date ? from->item->date : 0; + struct commit_list *from_iter = from, *to_iter = to; + int result; + + while (from_iter) { + add_object_array(&from_iter->item->object, NULL, &from_objs); + + if (!parse_commit(from_iter->item)) { + if (from_iter->item->date < min_commit_date) + min_commit_date = from_iter->item->date; + } + + from_iter = from_iter->next; + } + + while (to_iter) { + if (!parse_commit(to_iter->item)) { + if (to_iter->item->date < min_commit_date) + min_commit_date = to_iter->item->date; + } + + to_iter->item->object.flags |= PARENT2; + + to_iter = to_iter->next; + } + + result = can_all_from_reach_with_flag(&from_objs, PARENT2, PARENT1, + min_commit_date); + + while (from) { + clear_commit_marks(from->item, PARENT1); + from = from->next; + } + + while (to) { + clear_commit_marks(to->item, PARENT2); + to = to->next; + } + + object_array_clear(&from_objs); + return result; +} diff --git a/commit-reach.h b/commit-reach.h index b28bc22fcd..aa202c9703 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -72,5 +72,7 @@ int can_all_from_reach_with_flag(struct object_array *from, unsigned int with_flag, unsigned int assign_flag, time_t min_commit_date); +int can_all_from_reach(struct commit_list *from, struct commit_list *to, + int commit_date_cutoff); #endif diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index e32e193b70..c79729cac0 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -29,7 +29,7 @@ int cmd__reach(int ac, const char **av) { struct object_id oid_A, oid_B; struct commit *A, *B; - struct commit_list *X; + struct commit_list *X, *Y; struct commit **X_array; int X_nr, X_alloc; struct strbuf buf = STRBUF_INIT; @@ -41,7 +41,7 @@ int cmd__reach(int ac, const char **av) exit(1); A = B = NULL; - X = NULL; + X = Y = NULL; X_nr = 0; X_alloc = 16; ALLOC_ARRAY(X_array, X_alloc); @@ -86,6 +86,10 @@ int cmd__reach(int ac, const char **av) X_array[X_nr++] = c; break; + case 'Y': + commit_list_insert(c, &Y); + break; + default: die("unexpected start of line: %c", buf.buf[0]); } @@ -106,6 +110,8 @@ int cmd__reach(int ac, const char **av) struct commit_list *list = reduce_heads(X); printf("%s(X):\n", av[1]); print_sorted_commit_ids(list); + } else if (!strcmp(av[1], "can_all_from_reach")) { + printf("%s(X,Y):%d\n", av[1], can_all_from_reach(X, Y, 1)); } exit(0); diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index 17c6467988..e41eb397a7 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -160,4 +160,49 @@ test_expect_success 'reduce_heads' ' test_three_modes reduce_heads ' +test_expect_success 'can_all_from_reach:hit' ' + cat >input <<-\EOF && + X:commit-2-10 + X:commit-3-9 + X:commit-4-8 + X:commit-5-7 + X:commit-6-6 + X:commit-7-5 + X:commit-8-4 + X:commit-9-3 + Y:commit-1-9 + Y:commit-2-8 + Y:commit-3-7 + Y:commit-4-6 + Y:commit-5-5 + Y:commit-6-4 + Y:commit-7-3 + Y:commit-8-1 + EOF + echo "can_all_from_reach(X,Y):1" >expect && + test_three_modes can_all_from_reach +' + +test_expect_success 'can_all_from_reach:miss' ' + cat >input <<-\EOF && + X:commit-2-10 + X:commit-3-9 + X:commit-4-8 + X:commit-5-7 + X:commit-6-6 + X:commit-7-5 + X:commit-8-4 + X:commit-9-3 + Y:commit-1-9 + Y:commit-2-8 + Y:commit-3-7 + Y:commit-4-6 + Y:commit-5-5 + Y:commit-6-4 + Y:commit-8-5 + EOF + echo "can_all_from_reach(X,Y):0" >expect && + test_three_modes can_all_from_reach +' + test_done -- cgit 1.2.3-korg From 1e3497a24cf13fe907b247d1b93a997d6537cca1 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Jul 2018 16:33:27 +0000 Subject: commit-reach: replace ref_newer logic The ref_newer method is used by 'git push' to check if a force-push is required. This method does not use any kind of cutoff when walking, so in the case of a force-push will walk all reachable commits. The is_descendant_of method already uses paint_down_to_common along with cutoffs. By translating the ref_newer arguments into the commit and commit_list required by is_descendant_of, we can have one fewer commit walk and also improve our performance! For a copy of the Linux repository, 'test-tool reach ref_newer' presents the following improvements with the specified input. In the case that ref_newer returns 1, there is no improvement. The improvement is in the second case where ref_newer returns 0. Input: A:v4.9 B:v3.19 Before: 0.09 s After: 0.09 s To test the negative case, add a new commit with parent v3.19, regenerate the commit-graph, and then run with B pointing at that commit. Before: 0.43 s After: 0.09 s Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-reach.c | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) (limited to 'commit-reach.c') diff --git a/commit-reach.c b/commit-reach.c index 940fbf2e17..f5858944fd 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -366,20 +366,11 @@ void reduce_heads_replace(struct commit_list **heads) *heads = result; } -static void unmark_and_free(struct commit_list *list, unsigned int mark) -{ - while (list) { - struct commit *commit = pop_commit(&list); - commit->object.flags &= ~mark; - } -} - int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid) { struct object *o; struct commit *old_commit, *new_commit; - struct commit_list *list, *used; - int found = 0; + struct commit_list *old_commit_list = NULL; /* * Both new_commit and old_commit must be commit-ish and new_commit is descendant of @@ -400,19 +391,8 @@ int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid) if (parse_commit(new_commit) < 0) return 0; - used = list = NULL; - commit_list_insert(new_commit, &list); - while (list) { - new_commit = pop_most_recent_commit(&list, TMP_MARK); - commit_list_insert(new_commit, &used); - if (new_commit == old_commit) { - found = 1; - break; - } - } - unmark_and_free(list, TMP_MARK); - unmark_and_free(used, TMP_MARK); - return found; + commit_list_insert(old_commit, &old_commit_list); + return is_descendant_of(new_commit, old_commit_list); } /* -- cgit 1.2.3-korg From 4fbcca4effc1c6f8431120f88f5a4bd1c8e38ca3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Jul 2018 16:33:28 +0000 Subject: commit-reach: make can_all_from_reach... linear The can_all_from_reach_with_flags() algorithm is currently quadratic in the worst case, because it calls the reachable() method for every 'from' without tracking which commits have already been walked or which can already reach a commit in 'to'. Rewrite the algorithm to walk each commit a constant number of times. We also add some optimizations that should work for the main consumer of this method: fetch negotitation (haves/wants). The first step includes using a depth-first-search (DFS) from each 'from' commit, sorted by ascending generation number. We do not walk beyond the minimum generation number or the minimum commit date. This DFS is likely to be faster than the existing reachable() method because we expect previous ref values to be along the first-parent history. If we find a target commit, then we mark everything in the DFS stack as a RESULT. This expands the set of targets for the other 'from' commits. We also mark the visited commits using 'assign_flag' to prevent re- walking the same commits. We still need to clear our flags at the end, which is why we will have a total of three visits to each commit. Performance was measured on the Linux repository using 'test-tool reach can_all_from_reach'. The input included rows seeded by tag values. The "small" case included X-rows as v4.[0-9]* and Y-rows as v3.[0-9]*. This mimics a (very large) fetch that says "I have all major v3 releases and want all major v4 releases." The "large" case included X-rows as "v4.*" and Y-rows as "v3.*". This adds all release-candidate tags to the set, which does not greatly increase the number of objects that are considered, but does increase the number of 'from' commits, demonstrating the quadratic nature of the previous code. Small Case: Before: 1.52 s After: 0.26 s Large Case: Before: 3.50 s After: 0.27 s Note how the time increases between the two cases in the two versions. The new code increases relative to the number of commits that need to be walked, but not directly relative to the number of 'from' commits. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-reach.c | 122 +++++++++++++++++++++++++++++++++++---------------------- commit-reach.h | 9 ++--- upload-pack.c | 5 ++- 3 files changed, 83 insertions(+), 53 deletions(-) (limited to 'commit-reach.c') diff --git a/commit-reach.c b/commit-reach.c index f5858944fd..bc522d6840 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -514,66 +514,87 @@ int commit_contains(struct ref_filter *filter, struct commit *commit, return is_descendant_of(commit, list); } -int reachable(struct commit *from, unsigned int with_flag, - unsigned int assign_flag, time_t min_commit_date) +static int compare_commits_by_gen(const void *_a, const void *_b) { - struct prio_queue work = { compare_commits_by_commit_date }; + const struct commit *a = (const struct commit *)_a; + const struct commit *b = (const struct commit *)_b; - prio_queue_put(&work, from); - while (work.nr) { - struct commit_list *list; - struct commit *commit = prio_queue_get(&work); - - if (commit->object.flags & with_flag) { - from->object.flags |= assign_flag; - break; - } - if (!commit->object.parsed) - parse_object(the_repository, &commit->object.oid); - if (commit->object.flags & REACHABLE) - continue; - commit->object.flags |= REACHABLE; - if (commit->date < min_commit_date) - continue; - for (list = commit->parents; list; list = list->next) { - struct commit *parent = list->item; - if (!(parent->object.flags & REACHABLE)) - prio_queue_put(&work, parent); - } - } - from->object.flags |= REACHABLE; - clear_commit_marks(from, REACHABLE); - clear_prio_queue(&work); - return (from->object.flags & assign_flag); + if (a->generation < b->generation) + return -1; + if (a->generation > b->generation) + return 1; + return 0; } int can_all_from_reach_with_flag(struct object_array *from, unsigned int with_flag, unsigned int assign_flag, - time_t min_commit_date) + time_t min_commit_date, + uint32_t min_generation) { + struct commit **list = NULL; int i; + int result = 1; + ALLOC_ARRAY(list, from->nr); for (i = 0; i < from->nr; i++) { - struct object *from_one = from->objects[i].item; + list[i] = (struct commit *)from->objects[i].item; - if (from_one->flags & assign_flag) - continue; - from_one = deref_tag(the_repository, from_one, "a from object", 0); - if (!from_one || from_one->type != OBJ_COMMIT) { - /* no way to tell if this is reachable by - * looking at the ancestry chain alone, so - * leave a note to ourselves not to worry about - * this object anymore. - */ - from->objects[i].item->flags |= assign_flag; - continue; - } - if (!reachable((struct commit *)from_one, with_flag, assign_flag, - min_commit_date)) + if (parse_commit(list[i]) || + list[i]->generation < min_generation) return 0; } - return 1; + + QSORT(list, from->nr, compare_commits_by_gen); + + for (i = 0; i < from->nr; i++) { + /* DFS from list[i] */ + struct commit_list *stack = NULL; + + list[i]->object.flags |= assign_flag; + commit_list_insert(list[i], &stack); + + while (stack) { + struct commit_list *parent; + + if (stack->item->object.flags & with_flag) { + pop_commit(&stack); + continue; + } + + for (parent = stack->item->parents; parent; parent = parent->next) { + if (parent->item->object.flags & (with_flag | RESULT)) + stack->item->object.flags |= RESULT; + + if (!(parent->item->object.flags & assign_flag)) { + parent->item->object.flags |= assign_flag; + + if (parse_commit(parent->item) || + parent->item->date < min_commit_date || + parent->item->generation < min_generation) + continue; + + commit_list_insert(parent->item, &stack); + break; + } + } + + if (!parent) + pop_commit(&stack); + } + + if (!(list[i]->object.flags & (with_flag | RESULT))) { + result = 0; + goto cleanup; + } + } + +cleanup: + for (i = 0; i < from->nr; i++) { + clear_commit_marks(list[i], RESULT); + clear_commit_marks(list[i], assign_flag); + } + return result; } int can_all_from_reach(struct commit_list *from, struct commit_list *to, @@ -583,6 +604,7 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, time_t min_commit_date = cutoff_by_min_date ? from->item->date : 0; struct commit_list *from_iter = from, *to_iter = to; int result; + uint32_t min_generation = GENERATION_NUMBER_INFINITY; while (from_iter) { add_object_array(&from_iter->item->object, NULL, &from_objs); @@ -590,6 +612,9 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, if (!parse_commit(from_iter->item)) { if (from_iter->item->date < min_commit_date) min_commit_date = from_iter->item->date; + + if (from_iter->item->generation < min_generation) + min_generation = from_iter->item->generation; } from_iter = from_iter->next; @@ -599,6 +624,9 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, if (!parse_commit(to_iter->item)) { if (to_iter->item->date < min_commit_date) min_commit_date = to_iter->item->date; + + if (to_iter->item->generation < min_generation) + min_generation = to_iter->item->generation; } to_iter->item->object.flags |= PARENT2; @@ -607,7 +635,7 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, } result = can_all_from_reach_with_flag(&from_objs, PARENT2, PARENT1, - min_commit_date); + min_commit_date, min_generation); while (from) { clear_commit_marks(from->item, PARENT1); diff --git a/commit-reach.h b/commit-reach.h index aa202c9703..7d313e2975 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -59,19 +59,18 @@ define_commit_slab(contains_cache, enum contains_result); int commit_contains(struct ref_filter *filter, struct commit *commit, struct commit_list *list, struct contains_cache *cache); -int reachable(struct commit *from, unsigned int with_flag, - unsigned int assign_flag, time_t min_commit_date); - /* * Determine if every commit in 'from' can reach at least one commit * that is marked with 'with_flag'. As we traverse, use 'assign_flag' * as a marker for commits that are already visited. Do not walk - * commits with date below 'min_commit_date'. + * commits with date below 'min_commit_date' or generation below + * 'min_generation'. */ int can_all_from_reach_with_flag(struct object_array *from, unsigned int with_flag, unsigned int assign_flag, - time_t min_commit_date); + time_t min_commit_date, + uint32_t min_generation); int can_all_from_reach(struct commit_list *from, struct commit_list *to, int commit_date_cutoff); diff --git a/upload-pack.c b/upload-pack.c index 11c426685d..1e498f1188 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -338,11 +338,14 @@ static int got_oid(const char *hex, struct object_id *oid) static int ok_to_give_up(void) { + uint32_t min_generation = GENERATION_NUMBER_ZERO; + if (!have_obj.nr) return 0; return can_all_from_reach_with_flag(&want_obj, THEY_HAVE, - COMMON_KNOWN, oldest_have); + COMMON_KNOWN, oldest_have, + min_generation); } static int get_common_commits(void) -- cgit 1.2.3-korg From 6cc017431c1c48f80d1c6512fdcc9866cf4b7f55 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Jul 2018 16:33:30 +0000 Subject: commit-reach: use can_all_from_reach The is_descendant_of method previously used in_merge_bases() to check if the commit can reach any of the commits in the provided list. This had two performance problems: 1. The performance is quadratic in worst-case. 2. A single in_merge_bases() call requires walking beyond the target commit in order to find the full set of boundary commits that may be merge-bases. The can_all_from_reach method avoids this quadratic behavior and can limit the search beyond the target commits using generation numbers. It requires a small prototype adjustment to stop using commit-date as a cutoff, as that optimization is no longer appropriate here. Since in_merge_bases() uses paint_down_to_common(), is_descendant_of() naturally found cutoffs to avoid walking the entire commit graph. Since we want to always return the correct result, we cannot use the min_commit_date cutoff in can_all_from_reach. We then rely on generation numbers to provide the cutoff. Since not all repos will have a commit-graph file, nor will we always have generation numbers computed for a commit-graph file, create a new method, generation_numbers_enabled(), that checks for a commit-graph file and sees if the first commit in the file has a non-zero generation number. In the case that we do not have generation numbers, use the old logic for is_descendant_of(). Performance was meausured on a copy of the Linux repository using the 'test-tool reach is_descendant_of' command using this input: A:v4.9 X:v4.10 X:v4.11 X:v4.12 X:v4.13 X:v4.14 X:v4.15 X:v4.16 X:v4.17 X.v3.0 Note that this input is tailored to demonstrate the quadratic nature of the previous method, as it will compute merge-bases for v4.9 versus all of the later versions before checking against v4.1. Before: 0.26 s After: 0.21 s Since we previously used the is_descendant_of method in the ref_newer method, we also measured performance there using 'test-tool reach ref_newer' with this input: A:v4.9 B:v3.19 Before: 0.10 s After: 0.08 s By adding a new commit with parent v3.19, we test the non-reachable case of ref_newer: Before: 0.09 s After: 0.08 s Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 18 ++++++++++++++++++ commit-graph.h | 6 ++++++ commit-reach.c | 24 +++++++++++++++++------- 3 files changed, 41 insertions(+), 7 deletions(-) (limited to 'commit-reach.c') diff --git a/commit-graph.c b/commit-graph.c index b0a55ad128..e9786fa864 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -233,6 +233,24 @@ static int prepare_commit_graph(struct repository *r) return !!r->objects->commit_graph; } +int generation_numbers_enabled(struct repository *r) +{ + uint32_t first_generation; + struct commit_graph *g; + if (!prepare_commit_graph(r)) + return 0; + + g = r->objects->commit_graph; + + if (!g->num_commits) + return 0; + + first_generation = get_be32(g->chunk_commit_data + + g->hash_len + 8) >> 2; + + return !!first_generation; +} + static void close_commit_graph(void) { free_commit_graph(the_repository->objects->commit_graph); diff --git a/commit-graph.h b/commit-graph.h index 76e098934a..0de8f88316 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -51,6 +51,12 @@ struct commit_graph { struct commit_graph *load_commit_graph_one(const char *graph_file); +/* + * Return 1 if and only if the repository has a commit-graph + * file and generation numbers are computed in that file. + */ +int generation_numbers_enabled(struct repository *r); + void write_commit_graph_reachable(const char *obj_dir, int append); void write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, diff --git a/commit-reach.c b/commit-reach.c index bc522d6840..c996524032 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -277,15 +277,25 @@ int is_descendant_of(struct commit *commit, struct commit_list *with_commit) { if (!with_commit) return 1; - while (with_commit) { - struct commit *other; - other = with_commit->item; - with_commit = with_commit->next; - if (in_merge_bases(other, commit)) - return 1; + if (generation_numbers_enabled(the_repository)) { + struct commit_list *from_list = NULL; + int result; + commit_list_insert(commit, &from_list); + result = can_all_from_reach(from_list, with_commit, 0); + free_commit_list(from_list); + return result; + } else { + while (with_commit) { + struct commit *other; + + other = with_commit->item; + with_commit = with_commit->next; + if (in_merge_bases(other, commit)) + return 1; + } + return 0; } - return 0; } /* -- cgit 1.2.3-korg From 6621c838743812aaba96e55cfec8524ea1144c2d Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Tue, 28 Aug 2018 14:36:57 -0700 Subject: commit-reach: correct accidental #include of C file Without this change, the build breaks with clang: libgit/ref-filter.pic.o: multiple definition of 'filter_refs' libgit/commit-reach.pic.o: previous definition here Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- commit-reach.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'commit-reach.c') diff --git a/commit-reach.c b/commit-reach.c index c996524032..86715c103c 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -4,7 +4,7 @@ #include "decorate.h" #include "prio-queue.h" #include "tree.h" -#include "ref-filter.c" +#include "ref-filter.h" #include "revision.h" #include "tag.h" #include "commit-reach.h" -- cgit 1.2.3-korg