summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2006-07-27 06:09:15 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2006-07-27 06:09:15 -0400
commita6d59b2238d465f4d785cfe7172cec247604832b (patch)
tree643251890b7a618a54213c331c43f55d9a9f2127
downloadremap-a6d59b2238d465f4d785cfe7172cec247604832b.tar.gz
Initial revision
-rwxr-xr-xdiff-remap-data2
-rwxr-xr-xgit-remap-data2
-rw-r--r--remap-log.c499
-rw-r--r--what-it-does52
4 files changed, 555 insertions, 0 deletions
diff --git a/diff-remap-data b/diff-remap-data
new file mode 100755
index 0000000..1b5fac1
--- /dev/null
+++ b/diff-remap-data
@@ -0,0 +1,2 @@
+#!/bin/sh
+diff -rN -U 0 $1 $2|remap-log -O$1 -N$2
diff --git a/git-remap-data b/git-remap-data
new file mode 100755
index 0000000..e7a887d
--- /dev/null
+++ b/git-remap-data
@@ -0,0 +1,2 @@
+#!/bin/sh
+GIT_DIFF_OPTS="-u 0" git diff -M "$@"
diff --git a/remap-log.c b/remap-log.c
new file mode 100644
index 0000000..e8dacf0
--- /dev/null
+++ b/remap-log.c
@@ -0,0 +1,499 @@
+/*
+ * Copyright (c) 2006, Al Viro. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+char *prefix1 = "a/", *prefix2 = "b/";
+char *from_prefix = "", *old_prefix = "O:", *new_prefix = "N:";
+size_t from_len = 0;
+
+char *line;
+size_t size;
+
+void die(char *s)
+{
+ fprintf(stderr, "remap: %s\n", s);
+ exit(1);
+}
+
+void Enomem(void)
+{
+ die("out of memory");
+}
+
+void Eio(void)
+{
+ die("IO error");
+}
+
+int getline(FILE *f)
+{
+ char *s;
+ if (!fgets(line, size, f)) {
+ if (!feof(f))
+ Eio();
+ return 0;
+ }
+ for (s = line + strlen(line); s[-1] != '\n'; s = s + strlen(s)) {
+ if (s == line + size - 1) {
+ line = realloc(line, 2 * size);
+ if (!line)
+ Enomem();
+ s = line + size - 1;
+ size *= 2;
+ }
+ if (!fgets(s, size - (s - line), f)) {
+ if (!feof(f))
+ Eio();
+ return 1;
+ }
+ }
+ s[-1] = '\0';
+ return 1;
+}
+
+/* to == 0 -> deletion */
+struct range_map {
+ int from, to;
+};
+
+struct file_map {
+ char *name;
+ struct file_map *next;
+ char *new_name;
+ int count;
+ int allocated;
+ int last;
+ struct range_map ranges[];
+};
+
+struct file_map *alloc_map(char *name)
+{
+ struct file_map *map;
+
+ map = malloc(sizeof(struct file_map) + 16 * sizeof(struct range_map));
+ if (!map)
+ Enomem();
+ map->name = map->new_name = strdup(name);
+ if (!map->name)
+ Enomem();
+ map->count = 0;
+ map->allocated = 16;
+ map->next = NULL;
+ map->last = 0;
+ return map;
+}
+
+/* this is 32bit FNV1 */
+uint32_t FNV_hash(char *name)
+{
+ uint32_t n = 0x811c9dc5;
+ while (*name) {
+ unsigned char c = *name++;
+ n *= 0x01000193;
+ n ^= c;
+ }
+ return n;
+}
+
+struct file_map *hash[1024];
+
+int hash_map(struct file_map *map)
+{
+ int n = FNV_hash(map->name) % 1024;
+ struct file_map **p = &hash[n];
+
+ while (*p) {
+ if (!strcmp((*p)->name, map->name))
+ return 0;
+ p = &(*p)->next;
+ }
+ *p = map;
+ if (map->new_name && !map->count)
+ return 0;
+ if (map->new_name && map->ranges[0].from != 1)
+ return 0;
+ return 1;
+}
+
+struct file_map *find_map(char *name)
+{
+ static struct file_map *last = NULL;
+ int n = FNV_hash(name) % 1024;
+ struct file_map *p;
+
+ if (last && !strcmp(last->name, name))
+ return last;
+
+ for (p = hash[n]; p && strcmp(p->name, name); p = p->next)
+ ;
+ if (p)
+ last = p;
+ return p;
+}
+
+void parse_map(char *name)
+{
+ struct file_map *map = NULL;
+ struct range_map *range;
+ char *s;
+ FILE *f;
+
+ f = fopen(name, "r");
+ if (!f)
+ die("can't open map");
+ while (getline(f)) {
+ if (line[0] == 'D') {
+ if (map && !hash_map(map))
+ goto Ebadmap;
+ if (line[1] != ' ')
+ goto Ebadmap;
+ if (strchr(line + 2, ' '))
+ goto Ebadmap;
+ map = alloc_map(line + 2);
+ map->new_name = NULL;
+ continue;
+ }
+ if (line[0] == 'M') {
+ if (map && !hash_map(map))
+ goto Ebadmap;
+ if (line[1] != ' ')
+ goto Ebadmap;
+ s = strchr(line + 2, ' ');
+ if (!s)
+ goto Ebadmap;
+ *s++ = '\0';
+ if (strchr(s, ' '))
+ goto Ebadmap;
+ map = alloc_map(line + 2);
+ if (strcmp(line + 2, s)) {
+ map->new_name = strdup(s);
+ if (!map->new_name)
+ Enomem();
+ }
+ continue;
+ }
+ if (!map || !map->new_name)
+ goto Ebadmap;
+ if (map->count == map->allocated) {
+ int n = 2 * map->allocated;
+ map = realloc(map, sizeof(struct file_map) +
+ n * sizeof(struct range_map));
+ if (!map)
+ Enomem();
+ map->allocated = n;
+ }
+ range = &map->ranges[map->count++];
+ if (sscanf(line, "%d %d%*c", &range->from, &range->to) != 2)
+ goto Ebadmap;
+ if (range > map->ranges && range->from <= range[-1].from)
+ goto Ebadmap;
+ }
+ if (map && !hash_map(map))
+ goto Ebadmap;
+ fclose(f);
+ return;
+Ebadmap:
+ die("bad map");
+}
+
+struct range_map *find_range(struct file_map *map, int l)
+{
+ struct range_map *range = &map->ranges[map->last];
+ struct range_map *p;
+
+ if (range->from <= l) {
+ p = &map->ranges[map->count - 1];
+ if (p->from > l) {
+ for (p = range; p->from <= l; p++)
+ ;
+ p--;
+ }
+ } else {
+ for (p = map->ranges; p->from <= l; p++)
+ ;
+ p--;
+ }
+ map->last = p - map->ranges;
+ return p;
+}
+
+void mapline(void)
+{
+ struct file_map *map;
+ struct range_map *range;
+ unsigned long l;
+ char *s1, *s2;
+ char *name;
+
+ if (strncmp(line, from_prefix, from_len))
+ goto noise;
+ s1 = strchr(line + from_len, ':');
+ if (!s1)
+ goto noise;
+ s2 = strchr(line + from_len, ' ');
+ if (s2 && s2 < s1)
+ goto noise;
+ l = strtoul(s1 + 1, &s2, 10);
+ if (s2 == s1 + 1 || *s2 != ':' || !l || l > INT_MAX)
+ goto noise;
+ *s1++ = *s2++ = '\0';
+ name = line + from_len;
+ map = find_map(name);
+ if (!map)
+ goto new;
+ if (!map->new_name)
+ goto old;
+ name = map->new_name;
+ range = find_range(map, l);
+ if (!range->to)
+ goto old;
+ l += range->to - range->from;
+new:
+ printf("%s%s:%lu:%s\n", new_prefix, name, l, s2);
+ return;
+old:
+ s1[-1] = s2[-1] = ':';
+ printf("%s%s\n", old_prefix, line + from_len);
+ return;
+noise:
+ printf("%s\n", line);
+}
+
+int parse_hunk(int *l1, int *l2, int *n1, int *n2)
+{
+ unsigned long n;
+ char *s, *p;
+ if (line[3] != '-')
+ return 0;
+ n = strtoul(line + 4, &s, 10);
+ if (s == line + 4 || n > INT_MAX)
+ return 0;
+ *l1 = n;
+ if (*s == ',') {
+ n = strtoul(s + 1, &p, 10);
+ if (p == s + 1 || n > INT_MAX)
+ return 0;
+ *n1 = n;
+ if (!n)
+ (*l1)++;
+ } else {
+ p = s;
+ *n1 = 1;
+ }
+ if (*p != ' ' || p[1] != '+')
+ return 0;
+ n = strtoul(p + 2, &s, 10);
+ if (s == p + 2 || n > INT_MAX)
+ return 0;
+ *l2 = n;
+ if (*s == ',') {
+ n = strtoul(s + 1, &p, 10);
+ if (p == s + 1 || n > INT_MAX)
+ return 0;
+ *n2 = n;
+ if (!n)
+ (*l2)++;
+ } else {
+ p = s;
+ *n2 = 1;
+ }
+ return 1;
+}
+
+void parse_diff(void)
+{
+ int skipping = -1, suppress = 1;
+ char *name1 = NULL, *name2 = NULL;
+ int from = 1, to = 1;
+ int l1, l2, n1, n2;
+ enum cmd {
+ Diff, Hunk, New, Del, Copy, Rename, Junk
+ } cmd;
+ static struct { const char *s; size_t len; } pref[] = {
+ [Hunk] = {"@@ ", 3},
+ [Diff] = {"diff ", 5},
+ [New] = {"new file ", 9},
+ [Del] = {"deleted file ", 12},
+ [Copy] = {"copy from ", 10},
+ [Rename] = {"rename from ", 11},
+ [Junk] = {"", 0},
+ };
+ size_t len1 = strlen(prefix1), len2 = strlen(prefix2);
+
+ while (getline(stdin)) {
+ if (skipping > 0) {
+ switch (line[0]) {
+ case '+':
+ case '-':
+ case '\\':
+ continue;
+ }
+ }
+ for (cmd = 0; strncmp(line, pref[cmd].s, pref[cmd].len); cmd++)
+ ;
+ switch (cmd) {
+ case Hunk:
+ if (skipping < 0)
+ goto Ediff;
+ if (!suppress) {
+ if (!skipping)
+ printf("M %s %s\n", name1, name2);
+ if (!parse_hunk(&l1, &l2, &n1, &n2))
+ goto Ediff;
+ if (l1 > from)
+ printf("%d %d\n", from, to);
+ if (n1)
+ printf("%d 0\n", l1);
+ from = l1 + n1;
+ to = l2 + n2;
+ }
+ skipping = 1;
+ break;
+ case Diff:
+ if (!suppress) {
+ if (!skipping)
+ printf("M %s %s\n", name1, name2);
+ printf("%d %d\n", from, to);
+ }
+ free(name1);
+ free(name2);
+ name2 = strrchr(line, ' ');
+ if (!name2)
+ goto Ediff;
+ *name2 = '\0';
+ name1 = strrchr(line, ' ');
+ if (!name1)
+ goto Ediff;
+ if (strncmp(name1 + 1, prefix1, len1))
+ goto Ediff;
+ if (strncmp(name2 + 1, prefix2, len2))
+ goto Ediff;
+ name1 = strdup(name1 + len1 + 1);
+ name2 = strdup(name2 + len2 + 1);
+ if (!name1 || !name2)
+ goto Ediff;
+ skipping = 0;
+ suppress = 0;
+ from = to = 1;
+ break;
+ case New:
+ if (skipping)
+ goto Ediff;
+ suppress = 1;
+ break;
+ case Del:
+ case Copy:
+ if (skipping)
+ goto Ediff;
+ printf("D %s\n", name2);
+ suppress = 1;
+ break;
+ case Rename:
+ if (skipping)
+ goto Ediff;
+ printf("D %s\n", name2);
+ break;
+ default:
+ break;
+ }
+ }
+ return;
+Ediff:
+ die("odd diff");
+}
+
+int main(int argc, char **argv)
+{
+ char *map_name = NULL;
+ char opt;
+ char *arg;
+ size_t len;
+ size = 256;
+ line = malloc(size);
+ if (!line)
+ Enomem();
+ for (argc--, argv++; argc; argc--, argv++) {
+ if (argv[0][0] != '-') {
+ map_name = argv[0];
+ continue;
+ }
+ opt = argv[0][1];
+ if (!opt)
+ goto Eargs;
+ arg = argv[0] + 2;
+ if (!*arg) {
+ if (!--argc)
+ goto Eargs;
+ arg = *++argv;
+ }
+ len = strlen(arg);
+ switch (opt) {
+ case 'O':
+ prefix1 = malloc(len + 2);
+ if (!prefix1)
+ Enomem();
+ memcpy(prefix1, arg, len);
+ prefix1[len] = '/';
+ prefix1[len + 1] = '\0';
+ break;
+ case 'N':
+ prefix2 = malloc(len + 2);
+ if (!prefix2)
+ Enomem();
+ memcpy(prefix2, arg, len);
+ prefix2[len] = '/';
+ prefix2[len + 1] = '\0';
+ break;
+ case 'p':
+ from_prefix = arg;
+ from_len = len;
+ break;
+ case 'o':
+ old_prefix = arg;
+ break;
+ case 'n':
+ new_prefix = arg;
+ break;
+ default:
+ Eargs:
+ die("bad arguments");
+ }
+ }
+
+ if (!map_name) {
+ parse_diff();
+ } else {
+ parse_map(map_name);
+ while (getline(stdin))
+ mapline();
+ }
+ return 0;
+
+}
diff --git a/what-it-does b/what-it-does
new file mode 100644
index 0000000..f0030d5
--- /dev/null
+++ b/what-it-does
@@ -0,0 +1,52 @@
+Use:
+ diff-remap-data <dir1> <dir2> >map
+or
+ git-remap-data <git-diff arguments> >map
+will build information for remapper.
+
+remap-log is a filter. It takes map as argument and, in the simplest form,
+will look at the lines in stdin that have form
+<filename>:<number>:<text>
+If the indicated line from old tree had survived into the new one, we will
+get
+N:<new-filename>:<new-number>:<text>
+on the output. If it hadn't, we get
+O:<filename>:<number>:<text>
+Lines that do not have such form are passed unchanged.
+
+Even that is already very useful for log comparison. E.g. if old-log is
+from the old tree and new-log is from the new one, we can do
+ remap-log map <old-log >foo
+ remap-log /dev/null <new-log >bar
+ diff -u foo bar
+and have the noise due to line number changes excluded (empty map means
+identity mapping, so the second line will simply slap N: on all lines of
+form <filename>:<number>:<text> in new-log).
+
+Note that it's not just for build logs; the thing is useful for sparse logs,
+grep -n output, etc., etc.
+
+Behaviour described above is the default; what _really_ happens is
+that we take lines of form
+<original_prefix><filename>:<number>:<text>
+and replace them with
+<prefix_for_new><new-filename>:<new-number>:<text>
+or
+<prefix_for_old><filename>:<number>:<text>
+Defaults are "", "N:" and "O:" resp.; what it gives us is the ability to
+do multiple remappings. IOW, we can say
+
+diff-remap-data old-tree newer-tree > map1
+diff-remap-data newer-tree current-tree > map2
+remap-log -o old: map1 <old-log | remap-log -p N: -o newer: -N current: map2>foo
+
+and get lines that didn't make it into the newer tree marked with old: and
+otherwise be unchanged, ones that made it to newer, but not the current to
+be marked with newer: and have the filenames/line numbers remapped and ones
+that made it all the way be marked with current: and remapped all the way
+to current tree.
+
+That's quite useful when you want to carry logs for a while, basically using
+them as annotated TODO ("logs" here can very well be results of grep -n with
+annotations added to them). You can have all still relevant bits stay with
+the locations in text and see what had fallen out.