diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2006-07-27 06:09:15 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2006-07-27 06:09:15 -0400 |
commit | a6d59b2238d465f4d785cfe7172cec247604832b (patch) | |
tree | 643251890b7a618a54213c331c43f55d9a9f2127 | |
download | remap-a6d59b2238d465f4d785cfe7172cec247604832b.tar.gz |
Initial revision
-rwxr-xr-x | diff-remap-data | 2 | ||||
-rwxr-xr-x | git-remap-data | 2 | ||||
-rw-r--r-- | remap-log.c | 499 | ||||
-rw-r--r-- | what-it-does | 52 |
4 files changed, 555 insertions, 0 deletions
diff --git a/diff-remap-data b/diff-remap-data new file mode 100755 index 0000000..1b5fac1 --- /dev/null +++ b/diff-remap-data @@ -0,0 +1,2 @@ +#!/bin/sh +diff -rN -U 0 $1 $2|remap-log -O$1 -N$2 diff --git a/git-remap-data b/git-remap-data new file mode 100755 index 0000000..e7a887d --- /dev/null +++ b/git-remap-data @@ -0,0 +1,2 @@ +#!/bin/sh +GIT_DIFF_OPTS="-u 0" git diff -M "$@" diff --git a/remap-log.c b/remap-log.c new file mode 100644 index 0000000..e8dacf0 --- /dev/null +++ b/remap-log.c @@ -0,0 +1,499 @@ +/* + * Copyright (c) 2006, Al Viro. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> + +char *prefix1 = "a/", *prefix2 = "b/"; +char *from_prefix = "", *old_prefix = "O:", *new_prefix = "N:"; +size_t from_len = 0; + +char *line; +size_t size; + +void die(char *s) +{ + fprintf(stderr, "remap: %s\n", s); + exit(1); +} + +void Enomem(void) +{ + die("out of memory"); +} + +void Eio(void) +{ + die("IO error"); +} + +int getline(FILE *f) +{ + char *s; + if (!fgets(line, size, f)) { + if (!feof(f)) + Eio(); + return 0; + } + for (s = line + strlen(line); s[-1] != '\n'; s = s + strlen(s)) { + if (s == line + size - 1) { + line = realloc(line, 2 * size); + if (!line) + Enomem(); + s = line + size - 1; + size *= 2; + } + if (!fgets(s, size - (s - line), f)) { + if (!feof(f)) + Eio(); + return 1; + } + } + s[-1] = '\0'; + return 1; +} + +/* to == 0 -> deletion */ +struct range_map { + int from, to; +}; + +struct file_map { + char *name; + struct file_map *next; + char *new_name; + int count; + int allocated; + int last; + struct range_map ranges[]; +}; + +struct file_map *alloc_map(char *name) +{ + struct file_map *map; + + map = malloc(sizeof(struct file_map) + 16 * sizeof(struct range_map)); + if (!map) + Enomem(); + map->name = map->new_name = strdup(name); + if (!map->name) + Enomem(); + map->count = 0; + map->allocated = 16; + map->next = NULL; + map->last = 0; + return map; +} + +/* this is 32bit FNV1 */ +uint32_t FNV_hash(char *name) +{ + uint32_t n = 0x811c9dc5; + while (*name) { + unsigned char c = *name++; + n *= 0x01000193; + n ^= c; + } + return n; +} + +struct file_map *hash[1024]; + +int hash_map(struct file_map *map) +{ + int n = FNV_hash(map->name) % 1024; + struct file_map **p = &hash[n]; + + while (*p) { + if (!strcmp((*p)->name, map->name)) + return 0; + p = &(*p)->next; + } + *p = map; + if (map->new_name && !map->count) + return 0; + if (map->new_name && map->ranges[0].from != 1) + return 0; + return 1; +} + +struct file_map *find_map(char *name) +{ + static struct file_map *last = NULL; + int n = FNV_hash(name) % 1024; + struct file_map *p; + + if (last && !strcmp(last->name, name)) + return last; + + for (p = hash[n]; p && strcmp(p->name, name); p = p->next) + ; + if (p) + last = p; + return p; +} + +void parse_map(char *name) +{ + struct file_map *map = NULL; + struct range_map *range; + char *s; + FILE *f; + + f = fopen(name, "r"); + if (!f) + die("can't open map"); + while (getline(f)) { + if (line[0] == 'D') { + if (map && !hash_map(map)) + goto Ebadmap; + if (line[1] != ' ') + goto Ebadmap; + if (strchr(line + 2, ' ')) + goto Ebadmap; + map = alloc_map(line + 2); + map->new_name = NULL; + continue; + } + if (line[0] == 'M') { + if (map && !hash_map(map)) + goto Ebadmap; + if (line[1] != ' ') + goto Ebadmap; + s = strchr(line + 2, ' '); + if (!s) + goto Ebadmap; + *s++ = '\0'; + if (strchr(s, ' ')) + goto Ebadmap; + map = alloc_map(line + 2); + if (strcmp(line + 2, s)) { + map->new_name = strdup(s); + if (!map->new_name) + Enomem(); + } + continue; + } + if (!map || !map->new_name) + goto Ebadmap; + if (map->count == map->allocated) { + int n = 2 * map->allocated; + map = realloc(map, sizeof(struct file_map) + + n * sizeof(struct range_map)); + if (!map) + Enomem(); + map->allocated = n; + } + range = &map->ranges[map->count++]; + if (sscanf(line, "%d %d%*c", &range->from, &range->to) != 2) + goto Ebadmap; + if (range > map->ranges && range->from <= range[-1].from) + goto Ebadmap; + } + if (map && !hash_map(map)) + goto Ebadmap; + fclose(f); + return; +Ebadmap: + die("bad map"); +} + +struct range_map *find_range(struct file_map *map, int l) +{ + struct range_map *range = &map->ranges[map->last]; + struct range_map *p; + + if (range->from <= l) { + p = &map->ranges[map->count - 1]; + if (p->from > l) { + for (p = range; p->from <= l; p++) + ; + p--; + } + } else { + for (p = map->ranges; p->from <= l; p++) + ; + p--; + } + map->last = p - map->ranges; + return p; +} + +void mapline(void) +{ + struct file_map *map; + struct range_map *range; + unsigned long l; + char *s1, *s2; + char *name; + + if (strncmp(line, from_prefix, from_len)) + goto noise; + s1 = strchr(line + from_len, ':'); + if (!s1) + goto noise; + s2 = strchr(line + from_len, ' '); + if (s2 && s2 < s1) + goto noise; + l = strtoul(s1 + 1, &s2, 10); + if (s2 == s1 + 1 || *s2 != ':' || !l || l > INT_MAX) + goto noise; + *s1++ = *s2++ = '\0'; + name = line + from_len; + map = find_map(name); + if (!map) + goto new; + if (!map->new_name) + goto old; + name = map->new_name; + range = find_range(map, l); + if (!range->to) + goto old; + l += range->to - range->from; +new: + printf("%s%s:%lu:%s\n", new_prefix, name, l, s2); + return; +old: + s1[-1] = s2[-1] = ':'; + printf("%s%s\n", old_prefix, line + from_len); + return; +noise: + printf("%s\n", line); +} + +int parse_hunk(int *l1, int *l2, int *n1, int *n2) +{ + unsigned long n; + char *s, *p; + if (line[3] != '-') + return 0; + n = strtoul(line + 4, &s, 10); + if (s == line + 4 || n > INT_MAX) + return 0; + *l1 = n; + if (*s == ',') { + n = strtoul(s + 1, &p, 10); + if (p == s + 1 || n > INT_MAX) + return 0; + *n1 = n; + if (!n) + (*l1)++; + } else { + p = s; + *n1 = 1; + } + if (*p != ' ' || p[1] != '+') + return 0; + n = strtoul(p + 2, &s, 10); + if (s == p + 2 || n > INT_MAX) + return 0; + *l2 = n; + if (*s == ',') { + n = strtoul(s + 1, &p, 10); + if (p == s + 1 || n > INT_MAX) + return 0; + *n2 = n; + if (!n) + (*l2)++; + } else { + p = s; + *n2 = 1; + } + return 1; +} + +void parse_diff(void) +{ + int skipping = -1, suppress = 1; + char *name1 = NULL, *name2 = NULL; + int from = 1, to = 1; + int l1, l2, n1, n2; + enum cmd { + Diff, Hunk, New, Del, Copy, Rename, Junk + } cmd; + static struct { const char *s; size_t len; } pref[] = { + [Hunk] = {"@@ ", 3}, + [Diff] = {"diff ", 5}, + [New] = {"new file ", 9}, + [Del] = {"deleted file ", 12}, + [Copy] = {"copy from ", 10}, + [Rename] = {"rename from ", 11}, + [Junk] = {"", 0}, + }; + size_t len1 = strlen(prefix1), len2 = strlen(prefix2); + + while (getline(stdin)) { + if (skipping > 0) { + switch (line[0]) { + case '+': + case '-': + case '\\': + continue; + } + } + for (cmd = 0; strncmp(line, pref[cmd].s, pref[cmd].len); cmd++) + ; + switch (cmd) { + case Hunk: + if (skipping < 0) + goto Ediff; + if (!suppress) { + if (!skipping) + printf("M %s %s\n", name1, name2); + if (!parse_hunk(&l1, &l2, &n1, &n2)) + goto Ediff; + if (l1 > from) + printf("%d %d\n", from, to); + if (n1) + printf("%d 0\n", l1); + from = l1 + n1; + to = l2 + n2; + } + skipping = 1; + break; + case Diff: + if (!suppress) { + if (!skipping) + printf("M %s %s\n", name1, name2); + printf("%d %d\n", from, to); + } + free(name1); + free(name2); + name2 = strrchr(line, ' '); + if (!name2) + goto Ediff; + *name2 = '\0'; + name1 = strrchr(line, ' '); + if (!name1) + goto Ediff; + if (strncmp(name1 + 1, prefix1, len1)) + goto Ediff; + if (strncmp(name2 + 1, prefix2, len2)) + goto Ediff; + name1 = strdup(name1 + len1 + 1); + name2 = strdup(name2 + len2 + 1); + if (!name1 || !name2) + goto Ediff; + skipping = 0; + suppress = 0; + from = to = 1; + break; + case New: + if (skipping) + goto Ediff; + suppress = 1; + break; + case Del: + case Copy: + if (skipping) + goto Ediff; + printf("D %s\n", name2); + suppress = 1; + break; + case Rename: + if (skipping) + goto Ediff; + printf("D %s\n", name2); + break; + default: + break; + } + } + return; +Ediff: + die("odd diff"); +} + +int main(int argc, char **argv) +{ + char *map_name = NULL; + char opt; + char *arg; + size_t len; + size = 256; + line = malloc(size); + if (!line) + Enomem(); + for (argc--, argv++; argc; argc--, argv++) { + if (argv[0][0] != '-') { + map_name = argv[0]; + continue; + } + opt = argv[0][1]; + if (!opt) + goto Eargs; + arg = argv[0] + 2; + if (!*arg) { + if (!--argc) + goto Eargs; + arg = *++argv; + } + len = strlen(arg); + switch (opt) { + case 'O': + prefix1 = malloc(len + 2); + if (!prefix1) + Enomem(); + memcpy(prefix1, arg, len); + prefix1[len] = '/'; + prefix1[len + 1] = '\0'; + break; + case 'N': + prefix2 = malloc(len + 2); + if (!prefix2) + Enomem(); + memcpy(prefix2, arg, len); + prefix2[len] = '/'; + prefix2[len + 1] = '\0'; + break; + case 'p': + from_prefix = arg; + from_len = len; + break; + case 'o': + old_prefix = arg; + break; + case 'n': + new_prefix = arg; + break; + default: + Eargs: + die("bad arguments"); + } + } + + if (!map_name) { + parse_diff(); + } else { + parse_map(map_name); + while (getline(stdin)) + mapline(); + } + return 0; + +} diff --git a/what-it-does b/what-it-does new file mode 100644 index 0000000..f0030d5 --- /dev/null +++ b/what-it-does @@ -0,0 +1,52 @@ +Use: + diff-remap-data <dir1> <dir2> >map +or + git-remap-data <git-diff arguments> >map +will build information for remapper. + +remap-log is a filter. It takes map as argument and, in the simplest form, +will look at the lines in stdin that have form +<filename>:<number>:<text> +If the indicated line from old tree had survived into the new one, we will +get +N:<new-filename>:<new-number>:<text> +on the output. If it hadn't, we get +O:<filename>:<number>:<text> +Lines that do not have such form are passed unchanged. + +Even that is already very useful for log comparison. E.g. if old-log is +from the old tree and new-log is from the new one, we can do + remap-log map <old-log >foo + remap-log /dev/null <new-log >bar + diff -u foo bar +and have the noise due to line number changes excluded (empty map means +identity mapping, so the second line will simply slap N: on all lines of +form <filename>:<number>:<text> in new-log). + +Note that it's not just for build logs; the thing is useful for sparse logs, +grep -n output, etc., etc. + +Behaviour described above is the default; what _really_ happens is +that we take lines of form +<original_prefix><filename>:<number>:<text> +and replace them with +<prefix_for_new><new-filename>:<new-number>:<text> +or +<prefix_for_old><filename>:<number>:<text> +Defaults are "", "N:" and "O:" resp.; what it gives us is the ability to +do multiple remappings. IOW, we can say + +diff-remap-data old-tree newer-tree > map1 +diff-remap-data newer-tree current-tree > map2 +remap-log -o old: map1 <old-log | remap-log -p N: -o newer: -N current: map2>foo + +and get lines that didn't make it into the newer tree marked with old: and +otherwise be unchanged, ones that made it to newer, but not the current to +be marked with newer: and have the filenames/line numbers remapped and ones +that made it all the way be marked with current: and remapped all the way +to current tree. + +That's quite useful when you want to carry logs for a while, basically using +them as annotated TODO ("logs" here can very well be results of grep -n with +annotations added to them). You can have all still relevant bits stay with +the locations in text and see what had fallen out. |