aboutsummaryrefslogtreecommitdiffstats
path: root/diff-helper.c
diff options
context:
space:
mode:
authorJunio C Hamano <junkio@cox.net>2005-05-17 23:29:49 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-05-18 11:16:24 -0700
commit915838c3cbf5695e754b2826ee5e8fd46b23b0b7 (patch)
treebc893d0dd849a227a7ffcdffdb0a545232c755b2 /diff-helper.c
parente7bd907db6a707481066f2c8d58e208110824358 (diff)
downloadgit-915838c3cbf5695e754b2826ee5e8fd46b23b0b7.tar.gz
[PATCH] Diff-helper update
This patch adds a framework and a stub implementation of rename detection to diff-helper program. The current stub code is just enough to detect pure renames in diff-tree output and not fancier. The plan is perhaps to use the same delta code when Nico's delta storage patch is merged for similarity evaluation purposes. Signed-off-by: Junio C Hamano <junkio@cox.net> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'diff-helper.c')
-rw-r--r--diff-helper.c141
1 files changed, 138 insertions, 3 deletions
diff --git a/diff-helper.c b/diff-helper.c
index cd0bad06ba..3ef57060d6 100644
--- a/diff-helper.c
+++ b/diff-helper.c
@@ -21,6 +21,129 @@ static int matches_pathspec(const char *name, const char **spec, int cnt)
return 0;
}
+static int detect_rename = 0;
+
+/*
+ * We do not detect circular renames. Just hold created and deleted
+ * entries and later attempt to match them up. If they do not match,
+ * then spit them out as deletes or creates as original.
+ */
+
+static struct diff_spec_hold {
+ struct diff_spec_hold *next;
+ struct diff_spec_hold *matched;
+ struct diff_spec old, new;
+ char path[1];
+} *createdfile, *deletedfile;
+
+static void hold_spec(const char *path,
+ struct diff_spec *old, struct diff_spec *new)
+{
+ struct diff_spec_hold **list, *elem;
+ list = (! old->file_valid) ? &createdfile : &deletedfile;
+ elem = xmalloc(sizeof(*elem) + strlen(path));
+ strcpy(elem->path, path);
+ elem->next = *list;
+ *list = elem;
+ elem->old = *old;
+ elem->new = *new;
+ elem->matched = 0;
+}
+
+#define MINIMUM_SCORE 7000
+int estimate_similarity(struct diff_spec *one, struct diff_spec *two)
+{
+ /* Return how similar they are, representing the score as an
+ * integer between 0 and 10000.
+ *
+ * This version is very dumb and detects exact matches only.
+ * Wnen Nico's delta stuff gets in, I'll use the delta
+ * algorithm to estimate the similarity score in core.
+ */
+
+ if (one->sha1_valid && two->sha1_valid &&
+ !memcmp(one->blob_sha1, two->blob_sha1, 20))
+ return 10000;
+ return 0;
+}
+
+static void flush_renames(const char **spec, int cnt, int reverse)
+{
+ struct diff_spec_hold *rename_src, *rename_dst, *elem;
+ struct diff_spec_hold *leftover = NULL;
+ int score, best_score;
+
+ while (createdfile) {
+ rename_dst = createdfile;
+ createdfile = rename_dst->next;
+ best_score = MINIMUM_SCORE;
+ rename_src = NULL;
+ for (elem = deletedfile;
+ elem;
+ elem = elem->next) {
+ if (elem->matched)
+ continue;
+ score = estimate_similarity(&elem->old,
+ &rename_dst->new);
+ if (best_score < score) {
+ rename_src = elem;
+ best_score = score;
+ }
+ }
+ if (rename_src) {
+ rename_src->matched = rename_dst;
+ rename_dst->matched = rename_src;
+
+ if (!cnt ||
+ matches_pathspec(rename_src->path, spec, cnt) ||
+ matches_pathspec(rename_dst->path, spec, cnt)) {
+ if (reverse)
+ run_external_diff(rename_dst->path,
+ rename_src->path,
+ &rename_dst->new,
+ &rename_src->old);
+ else
+ run_external_diff(rename_src->path,
+ rename_dst->path,
+ &rename_src->old,
+ &rename_dst->new);
+ }
+ }
+ else {
+ rename_dst->next = leftover;
+ leftover = rename_dst;
+ }
+ }
+
+ /* unmatched deletes */
+ for (elem = deletedfile; elem; elem = elem->next) {
+ if (elem->matched)
+ continue;
+ if (!cnt ||
+ matches_pathspec(elem->path, spec, cnt)) {
+ if (reverse)
+ run_external_diff(elem->path, NULL,
+ &elem->new, &elem->old);
+ else
+ run_external_diff(elem->path, NULL,
+ &elem->old, &elem->new);
+ }
+ }
+
+ /* unmatched creates */
+ for (elem = leftover; elem; elem = elem->next) {
+ if (!cnt ||
+ matches_pathspec(elem->path, spec, cnt)) {
+ if (reverse)
+ run_external_diff(elem->path, NULL,
+ &elem->new, &elem->old);
+ else
+ run_external_diff(elem->path, NULL,
+ &elem->old, &elem->new);
+ }
+ }
+}
+
static int parse_oneside_change(const char *cp, struct diff_spec *one,
char *path)
{
@@ -100,17 +223,24 @@ static int parse_diff_raw_output(const char *buf,
default:
return -1;
}
+
+ if (detect_rename && old.file_valid != new.file_valid) {
+ /* hold these */
+ hold_spec(path, &old, &new);
+ return 0;
+ }
+
if (!cnt || matches_pathspec(path, spec, cnt)) {
if (reverse)
- run_external_diff(path, &new, &old);
+ run_external_diff(path, NULL, &new, &old);
else
- run_external_diff(path, &old, &new);
+ run_external_diff(path, NULL, &old, &new);
}
return 0;
}
static const char *diff_helper_usage =
-"git-diff-helper [-R] [-z] paths...";
+ "git-diff-helper [-r] [-R] [-z] paths...";
int main(int ac, const char **av) {
struct strbuf sb;
@@ -124,6 +254,8 @@ int main(int ac, const char **av) {
reverse = 1;
else if (av[1][1] == 'z')
line_termination = 0;
+ else if (av[1][1] == 'r')
+ detect_rename = 1;
else
usage(diff_helper_usage);
ac--; av++;
@@ -139,5 +271,8 @@ int main(int ac, const char **av) {
if (status)
fprintf(stderr, "cannot parse %s\n", sb.buf);
}
+
+ if (detect_rename)
+ flush_renames(av+1, ac-1, reverse);
return 0;
}