From 0df86b66899f9d6f1c09cceb4743c8cef733836a Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Sun, 14 Apr 2024 01:11:37 +0000 Subject: fast-import: tighten path unquoting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Path parsing in fast-import is inconsistent and many unquoting errors are suppressed or not checked. appears in the grammar in these places: filemodify ::= 'M' SP ( | 'inline') SP LF filedelete ::= 'D' SP LF filecopy ::= 'C' SP SP LF filerename ::= 'R' SP SP LF ls ::= 'ls' SP SP LF ls-commit ::= 'ls' SP LF and fast-import.c parses them in five different ways: 1. For filemodify and filedelete: Try to unquote . If it unquotes without errors, use the unquoted version; otherwise, treat it as literal bytes to the end of the line (including any number of SP). 2. For filecopy (source) and filerename (source): Try to unquote . If it unquotes without errors, use the unquoted version; otherwise, treat it as literal bytes up to, but not including, the next SP. 3. For filecopy (dest) and filerename (dest): Like 1., but an unquoted empty string is forbidden. 4. For ls: If starts with `"`, unquote it and report parse errors; otherwise, treat it as literal bytes to the end of the line (including any number of SP). 5. For ls-commit: Unquote and report parse errors. (It must start with `"` to disambiguate from ls.) In the first three, any errors from trying to unquote a string are suppressed, so a quoted string that contains invalid escapes would be interpreted as literal bytes. For example, `"\xff"` would fail to unquote (because hex escapes are not supported), and it would instead be interpreted as the byte sequence '"', '\\', 'x', 'f', 'f', '"', which is certainly not intended. Some front-ends erroneously use their language's standard quoting routine instead of matching Git's, which could silently introduce escapes that would be incorrectly parsed due to this and lead to data corruption. The documentation states “To use a source path that contains SP the path must be quoted.”, so it is expected that some implementations depend on spaces being allowed in paths in the final position. Thus we have two documented ways to parse paths, so simplify the implementation to that. Now we have: 1. `parse_path_eol` for filemodify, filedelete, filecopy (dest), filerename (dest), ls, and ls-commit: If starts with `"`, unquote it and report parse errors; otherwise, treat it as literal bytes to the end of the line (including any number of SP). 2. `parse_path_space` for filecopy (source) and filerename (source): If starts with `"`, unquote it and report parse errors; otherwise, treat it as literal bytes up to, but not including, the next SP. It must be followed by SP. There remain two special cases: The dest in filecopy and rename cannot be an unquoted empty string (this will be addressed subsequently) and in ls-commit must be quoted to disambiguate it from ls. Signed-off-by: Thalia Archibald Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 108 ++++++++++++--------- t/t9300-fast-import.sh | 258 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 322 insertions(+), 44 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 782bda007c..8eba89689b 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2258,10 +2258,60 @@ static uintmax_t parse_mark_ref_space(const char **p) return mark; } +/* + * Parse the path string into the strbuf. The path can either be quoted with + * escape sequences or unquoted without escape sequences. Unquoted strings may + * contain spaces only if `is_last_field` is nonzero; otherwise, it stops + * parsing at the first space. + */ +static void parse_path(struct strbuf *sb, const char *p, const char **endp, + int is_last_field, const char *field) +{ + if (*p == '"') { + if (unquote_c_style(sb, p, endp)) + die("Invalid %s: %s", field, command_buf.buf); + } else { + /* + * Unless we are parsing the last field of a line, + * SP is the end of this field. + */ + *endp = is_last_field + ? p + strlen(p) + : strchrnul(p, ' '); + strbuf_add(sb, p, *endp - p); + } +} + +/* + * Parse the path string into the strbuf, and complain if this is not the end of + * the string. Unquoted strings may contain spaces. + */ +static void parse_path_eol(struct strbuf *sb, const char *p, const char *field) +{ + const char *end; + + parse_path(sb, p, &end, 1, field); + if (*end) + die("Garbage after %s: %s", field, command_buf.buf); +} + +/* + * Parse the path string into the strbuf, and ensure it is followed by a space. + * Unquoted strings may not contain spaces. Update *endp to point to the first + * character after the space. + */ +static void parse_path_space(struct strbuf *sb, const char *p, + const char **endp, const char *field) +{ + parse_path(sb, p, endp, 0, field); + if (**endp != ' ') + die("Missing space after %s: %s", field, command_buf.buf); + (*endp)++; +} + static void file_change_m(const char *p, struct branch *b) { static struct strbuf uq = STRBUF_INIT; - const char *endp; struct object_entry *oe; struct object_id oid; uint16_t mode, inline_data = 0; @@ -2299,11 +2349,8 @@ static void file_change_m(const char *p, struct branch *b) } strbuf_reset(&uq); - if (!unquote_c_style(&uq, p, &endp)) { - if (*endp) - die("Garbage after path in: %s", command_buf.buf); - p = uq.buf; - } + parse_path_eol(&uq, p, "path"); + p = uq.buf; /* Git does not track empty, non-toplevel directories. */ if (S_ISDIR(mode) && is_empty_tree_oid(&oid) && *p) { @@ -2367,48 +2414,29 @@ static void file_change_m(const char *p, struct branch *b) static void file_change_d(const char *p, struct branch *b) { static struct strbuf uq = STRBUF_INIT; - const char *endp; strbuf_reset(&uq); - if (!unquote_c_style(&uq, p, &endp)) { - if (*endp) - die("Garbage after path in: %s", command_buf.buf); - p = uq.buf; - } + parse_path_eol(&uq, p, "path"); + p = uq.buf; tree_content_remove(&b->branch_tree, p, NULL, 1); } -static void file_change_cr(const char *s, struct branch *b, int rename) +static void file_change_cr(const char *p, struct branch *b, int rename) { - const char *d; + const char *s, *d; static struct strbuf s_uq = STRBUF_INIT; static struct strbuf d_uq = STRBUF_INIT; - const char *endp; struct tree_entry leaf; strbuf_reset(&s_uq); - if (!unquote_c_style(&s_uq, s, &endp)) { - if (*endp != ' ') - die("Missing space after source: %s", command_buf.buf); - } else { - endp = strchr(s, ' '); - if (!endp) - die("Missing space after source: %s", command_buf.buf); - strbuf_add(&s_uq, s, endp - s); - } + parse_path_space(&s_uq, p, &p, "source"); s = s_uq.buf; - endp++; - if (!*endp) + if (!*p) die("Missing dest: %s", command_buf.buf); - - d = endp; strbuf_reset(&d_uq); - if (!unquote_c_style(&d_uq, d, &endp)) { - if (*endp) - die("Garbage after dest in: %s", command_buf.buf); - d = d_uq.buf; - } + parse_path_eol(&d_uq, p, "dest"); + d = d_uq.buf; memset(&leaf, 0, sizeof(leaf)); if (rename) @@ -3152,6 +3180,7 @@ static void print_ls(int mode, const unsigned char *hash, const char *path) static void parse_ls(const char *p, struct branch *b) { + static struct strbuf uq = STRBUF_INIT; struct tree_entry *root = NULL; struct tree_entry leaf = {NULL}; @@ -3168,16 +3197,9 @@ static void parse_ls(const char *p, struct branch *b) root->versions[1].mode = S_IFDIR; load_tree(root); } - if (*p == '"') { - static struct strbuf uq = STRBUF_INIT; - const char *endp; - strbuf_reset(&uq); - if (unquote_c_style(&uq, p, &endp)) - die("Invalid path: %s", command_buf.buf); - if (*endp) - die("Garbage after path in: %s", command_buf.buf); - p = uq.buf; - } + strbuf_reset(&uq); + parse_path_eol(&uq, p, "path"); + p = uq.buf; tree_content_get(root, p, &leaf, 1); /* * A directory in preparation would have a sha1 of zero diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 60e30fed3c..de2f1304e8 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -2142,6 +2142,7 @@ test_expect_success 'Q: deny note on empty branch' ' EOF test_must_fail git fast-import has a special +# case. Test every occurrence of in the grammar against every error case. +# + +# +# Valid paths at the end of a line: filemodify, filedelete, filecopy (dest), +# filerename (dest), and ls. +# +# commit :301 from root -- modify hello.c (for setup) +# commit :302 from :301 -- modify $path +# commit :303 from :302 -- delete $path +# commit :304 from :301 -- copy hello.c $path +# commit :305 from :301 -- rename hello.c $path +# ls :305 $path +# +test_path_eol_success () { + local test="$1" path="$2" unquoted_path="$3" + test_expect_success "S: paths at EOL with $test must work" ' + test_when_finished "git branch -D S-path-eol" && + + git fast-import --export-marks=marks.out <<-EOF >out 2>err && + blob + mark :401 + data < $GIT_COMMITTER_DATE + data < $GIT_COMMITTER_DATE + data < $GIT_COMMITTER_DATE + data < $GIT_COMMITTER_DATE + data < $GIT_COMMITTER_DATE + data <tree_m.exp && + git ls-tree $commit_m | sort >tree_m.out && + test_cmp tree_m.exp tree_m.out && + + printf "100644 blob $blob1\thello.c\n" >tree_d.exp && + git ls-tree $commit_d >tree_d.out && + test_cmp tree_d.exp tree_d.out && + + ( + printf "100644 blob $blob1\t$unquoted_path\n" && + printf "100644 blob $blob1\thello.c\n" + ) | sort >tree_c.exp && + git ls-tree $commit_c | sort >tree_c.out && + test_cmp tree_c.exp tree_c.out && + + printf "100644 blob $blob1\t$unquoted_path\n" >tree_r.exp && + git ls-tree $commit_r >tree_r.out && + test_cmp tree_r.exp tree_r.out && + + test_cmp out tree_r.exp + ' +} + +test_path_eol_success 'quoted spaces' '" hello world.c "' ' hello world.c ' +test_path_eol_success 'unquoted spaces' ' hello world.c ' ' hello world.c ' + +# +# Valid paths before a space: filecopy (source) and filerename (source). +# +# commit :301 from root -- modify $path (for setup) +# commit :302 from :301 -- copy $path hello2.c +# commit :303 from :301 -- rename $path hello2.c +# +test_path_space_success () { + local test="$1" path="$2" unquoted_path="$3" + test_expect_success "S: paths before space with $test must work" ' + test_when_finished "git branch -D S-path-space" && + + git fast-import --export-marks=marks.out <<-EOF 2>err && + blob + mark :401 + data < $GIT_COMMITTER_DATE + data < $GIT_COMMITTER_DATE + data < $GIT_COMMITTER_DATE + data <tree_c.exp && + git ls-tree $commit_c | sort >tree_c.out && + test_cmp tree_c.exp tree_c.out && + + printf "100644 blob $blob\thello2.c\n" >tree_r.exp && + git ls-tree $commit_r >tree_r.out && + test_cmp tree_r.exp tree_r.out + ' +} + +test_path_space_success 'quoted spaces' '" hello world.c "' ' hello world.c ' +test_path_space_success 'no unquoted spaces' 'hello_world.c' 'hello_world.c' + +# +# Test a single commit change with an invalid path. Run it with all occurrences +# of in the grammar against all error kinds. +# +test_path_fail () { + local change="$1" what="$2" prefix="$3" path="$4" suffix="$5" err_grep="$6" + test_expect_success "S: $change with $what must fail" ' + test_must_fail git fast-import <<-EOF 2>err && + blob + mark :1 + data < $GIT_COMMITTER_DATE + data < $GIT_COMMITTER_DATE + data <, the must be quoted. +test_path_eol_quoted_fail 'ls (without dataref in commit)' 'ls ' path + ### ### series T (ls) ### -- cgit 1.2.3-korg From 5733f894d7e66d0f0fbd29675d8420825ce0c29f Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Sun, 14 Apr 2024 01:11:44 +0000 Subject: fast-import: directly use strbufs for paths Previously, one case would not write the path to the strbuf: when the path is unquoted and at the end of the string. It was essentially copy-on-write. However, with the logic simplification of the previous commit, this case was eliminated and the strbuf is always populated. Directly use the strbufs now instead of an alias. Since this already changes all the lines that use the strbufs, rename them from `uq` to be more descriptive. That they are unquoted is not their most important property, so name them after what they carry. Additionally, `file_change_m` no longer needs to copy the path before reading inline data. Signed-off-by: Thalia Archibald Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 64 ++++++++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 8eba89689b..765429a66c 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2311,7 +2311,7 @@ static void parse_path_space(struct strbuf *sb, const char *p, static void file_change_m(const char *p, struct branch *b) { - static struct strbuf uq = STRBUF_INIT; + static struct strbuf path = STRBUF_INIT; struct object_entry *oe; struct object_id oid; uint16_t mode, inline_data = 0; @@ -2348,13 +2348,12 @@ static void file_change_m(const char *p, struct branch *b) die("Missing space after SHA1: %s", command_buf.buf); } - strbuf_reset(&uq); - parse_path_eol(&uq, p, "path"); - p = uq.buf; + strbuf_reset(&path); + parse_path_eol(&path, p, "path"); /* Git does not track empty, non-toplevel directories. */ - if (S_ISDIR(mode) && is_empty_tree_oid(&oid) && *p) { - tree_content_remove(&b->branch_tree, p, NULL, 0); + if (S_ISDIR(mode) && is_empty_tree_oid(&oid) && *path.buf) { + tree_content_remove(&b->branch_tree, path.buf, NULL, 0); return; } @@ -2375,10 +2374,6 @@ static void file_change_m(const char *p, struct branch *b) if (S_ISDIR(mode)) die("Directories cannot be specified 'inline': %s", command_buf.buf); - if (p != uq.buf) { - strbuf_addstr(&uq, p); - p = uq.buf; - } while (read_next_command() != EOF) { const char *v; if (skip_prefix(command_buf.buf, "cat-blob ", &v)) @@ -2404,55 +2399,51 @@ static void file_change_m(const char *p, struct branch *b) command_buf.buf); } - if (!*p) { + if (!*path.buf) { tree_content_replace(&b->branch_tree, &oid, mode, NULL); return; } - tree_content_set(&b->branch_tree, p, &oid, mode, NULL); + tree_content_set(&b->branch_tree, path.buf, &oid, mode, NULL); } static void file_change_d(const char *p, struct branch *b) { - static struct strbuf uq = STRBUF_INIT; + static struct strbuf path = STRBUF_INIT; - strbuf_reset(&uq); - parse_path_eol(&uq, p, "path"); - p = uq.buf; - tree_content_remove(&b->branch_tree, p, NULL, 1); + strbuf_reset(&path); + parse_path_eol(&path, p, "path"); + tree_content_remove(&b->branch_tree, path.buf, NULL, 1); } static void file_change_cr(const char *p, struct branch *b, int rename) { - const char *s, *d; - static struct strbuf s_uq = STRBUF_INIT; - static struct strbuf d_uq = STRBUF_INIT; + static struct strbuf source = STRBUF_INIT; + static struct strbuf dest = STRBUF_INIT; struct tree_entry leaf; - strbuf_reset(&s_uq); - parse_path_space(&s_uq, p, &p, "source"); - s = s_uq.buf; + strbuf_reset(&source); + parse_path_space(&source, p, &p, "source"); if (!*p) die("Missing dest: %s", command_buf.buf); - strbuf_reset(&d_uq); - parse_path_eol(&d_uq, p, "dest"); - d = d_uq.buf; + strbuf_reset(&dest); + parse_path_eol(&dest, p, "dest"); memset(&leaf, 0, sizeof(leaf)); if (rename) - tree_content_remove(&b->branch_tree, s, &leaf, 1); + tree_content_remove(&b->branch_tree, source.buf, &leaf, 1); else - tree_content_get(&b->branch_tree, s, &leaf, 1); + tree_content_get(&b->branch_tree, source.buf, &leaf, 1); if (!leaf.versions[1].mode) - die("Path %s not in branch", s); - if (!*d) { /* C "path/to/subdir" "" */ + die("Path %s not in branch", source.buf); + if (!*dest.buf) { /* C "path/to/subdir" "" */ tree_content_replace(&b->branch_tree, &leaf.versions[1].oid, leaf.versions[1].mode, leaf.tree); return; } - tree_content_set(&b->branch_tree, d, + tree_content_set(&b->branch_tree, dest.buf, &leaf.versions[1].oid, leaf.versions[1].mode, leaf.tree); @@ -3180,7 +3171,7 @@ static void print_ls(int mode, const unsigned char *hash, const char *path) static void parse_ls(const char *p, struct branch *b) { - static struct strbuf uq = STRBUF_INIT; + static struct strbuf path = STRBUF_INIT; struct tree_entry *root = NULL; struct tree_entry leaf = {NULL}; @@ -3197,10 +3188,9 @@ static void parse_ls(const char *p, struct branch *b) root->versions[1].mode = S_IFDIR; load_tree(root); } - strbuf_reset(&uq); - parse_path_eol(&uq, p, "path"); - p = uq.buf; - tree_content_get(root, p, &leaf, 1); + strbuf_reset(&path); + parse_path_eol(&path, p, "path"); + tree_content_get(root, path.buf, &leaf, 1); /* * A directory in preparation would have a sha1 of zero * until it is saved. Save, for simplicity. @@ -3208,7 +3198,7 @@ static void parse_ls(const char *p, struct branch *b) if (S_ISDIR(leaf.versions[1].mode)) store_tree(&leaf); - print_ls(leaf.versions[1].mode, leaf.versions[1].oid.hash, p); + print_ls(leaf.versions[1].mode, leaf.versions[1].oid.hash, path.buf); if (leaf.tree) release_tree_content_recursive(leaf.tree); if (!b || root != &b->branch_tree) -- cgit 1.2.3-korg From b5062f752ef039b6fa8b3a7491072c2f1dfe3cf2 Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Sun, 14 Apr 2024 01:11:52 +0000 Subject: fast-import: allow unquoted empty path for root MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ever since filerename was added in f39a946a1f (Support wholesale directory renames in fast-import, 2007-07-09) and filecopy in b6f3481bb4 (Teach fast-import to recursively copy files/directories, 2007-07-15), both have produced an error when the destination path is empty. Later, when support for targeting the root directory with an empty string was added in 2794ad5244 (fast-import: Allow filemodify to set the root, 2010-10-10), this had the effect of allowing the quoted empty string (`""`), but forbidding its unquoted variant (``). This seems to have been intended as simple data validation for parsing two paths, rather than a syntax restriction, because it was not extended to the other operations. All other occurrences of paths (in filemodify, filedelete, the source of filecopy and filerename, and ls) allow both. For most of this feature's lifetime, the documentation has not prescribed the use of quoted empty strings. In e5959106d6 (Documentation/fast-import: put explanation of M 040000 "" in context, 2011-01-15), its documentation was changed from “`` may also be an empty string (`""`) to specify the root of the tree” to “The root of the tree can be represented by an empty string as ``”. Thus, we should assume that some front-ends have depended on this behavior. Remove this restriction for the destination paths of filecopy and filerename and change tests targeting the root to test `""` and ``. Signed-off-by: Thalia Archibald Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 3 - t/t9300-fast-import.sh | 363 ++++++++++++++++++++++++++----------------------- 2 files changed, 190 insertions(+), 176 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 765429a66c..c8a1e3ef3d 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2423,9 +2423,6 @@ static void file_change_cr(const char *p, struct branch *b, int rename) strbuf_reset(&source); parse_path_space(&source, p, &p, "source"); - - if (!*p) - die("Missing dest: %s", command_buf.buf); strbuf_reset(&dest); parse_path_eol(&dest, p, "dest"); diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index de2f1304e8..13f98e6688 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -1059,30 +1059,33 @@ test_expect_success 'M: rename subdirectory to new subdirectory' ' compare_diff_raw expect actual ' -test_expect_success 'M: rename root to subdirectory' ' - cat >input <<-INPUT_END && - commit refs/heads/M4 - committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE - data <input <<-INPUT_END && + commit refs/heads/M4 + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <expect <<-EOF && - :100644 100644 $oldf $oldf R100 file2/oldf sub/file2/oldf - :100755 100755 $f4id $f4id R100 file4 sub/file4 - :100755 100755 $newf $newf R100 i/am/new/to/you sub/i/am/new/to/you - :100755 100755 $f6id $f6id R100 newdir/exec.sh sub/newdir/exec.sh - :100644 100644 $f5id $f5id R100 newdir/interesting sub/newdir/interesting - EOF - git fast-import actual && - compare_diff_raw expect actual -' + cat >expect <<-EOF && + :100644 100644 $oldf $oldf R100 file2/oldf sub/file2/oldf + :100755 100755 $f4id $f4id R100 file4 sub/file4 + :100755 100755 $newf $newf R100 i/am/new/to/you sub/i/am/new/to/you + :100755 100755 $f6id $f6id R100 newdir/exec.sh sub/newdir/exec.sh + :100644 100644 $f5id $f5id R100 newdir/interesting sub/newdir/interesting + EOF + git fast-import actual && + compare_diff_raw expect actual + ' +done ### ### series N @@ -1259,49 +1262,52 @@ test_expect_success PIPE 'N: empty directory reads as missing' ' test_cmp expect actual ' -test_expect_success 'N: copy root directory by tree hash' ' - cat >expect <<-EOF && - :100755 000000 $newf $zero D file3/newf - :100644 000000 $oldf $zero D file3/oldf - EOF - root=$(git rev-parse refs/heads/branch^0^{tree}) && - cat >input <<-INPUT_END && - commit refs/heads/N6 - committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE - data <expect <<-EOF && + :100755 000000 $newf $zero D file3/newf + :100644 000000 $oldf $zero D file3/oldf + EOF + root_tree=$(git rev-parse refs/heads/branch^0^{tree}) && + cat >input <<-INPUT_END && + commit refs/heads/N6 + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <actual && - compare_diff_raw expect actual -' + from refs/heads/branch^0 + M 040000 $root_tree $root + INPUT_END + git fast-import actual && + compare_diff_raw expect actual + ' -test_expect_success 'N: copy root by path' ' - cat >expect <<-EOF && - :100755 100755 $newf $newf C100 file2/newf oldroot/file2/newf - :100644 100644 $oldf $oldf C100 file2/oldf oldroot/file2/oldf - :100755 100755 $f4id $f4id C100 file4 oldroot/file4 - :100755 100755 $f6id $f6id C100 newdir/exec.sh oldroot/newdir/exec.sh - :100644 100644 $f5id $f5id C100 newdir/interesting oldroot/newdir/interesting - EOF - cat >input <<-INPUT_END && - commit refs/heads/N-copy-root-path - committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE - data <expect <<-EOF && + :100755 100755 $newf $newf C100 file2/newf oldroot/file2/newf + :100644 100644 $oldf $oldf C100 file2/oldf oldroot/file2/oldf + :100755 100755 $f4id $f4id C100 file4 oldroot/file4 + :100755 100755 $f6id $f6id C100 newdir/exec.sh oldroot/newdir/exec.sh + :100644 100644 $f5id $f5id C100 newdir/interesting oldroot/newdir/interesting + EOF + cat >input <<-INPUT_END && + commit refs/heads/N-copy-root-path + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <actual && - compare_diff_raw expect actual -' + from refs/heads/branch^0 + C $root oldroot + INPUT_END + git fast-import actual && + compare_diff_raw expect actual + ' +done test_expect_success 'N: delete directory by copying' ' cat >expect <<-\EOF && @@ -1431,98 +1437,102 @@ test_expect_success 'N: reject foo/ syntax in ls argument' ' INPUT_END ' -test_expect_success 'N: copy to root by id and modify' ' - echo "hello, world" >expect.foo && - echo hello >expect.bar && - git fast-import <<-SETUP_END && - commit refs/heads/N7 - committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE - data <expect.foo && + echo hello >expect.bar && + git fast-import <<-SETUP_END && + commit refs/heads/N7 + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data < $GIT_COMMITTER_DATE - data < $GIT_COMMITTER_DATE + data <actual.foo && - git show N8:foo/bar >actual.bar && - test_cmp expect.foo actual.foo && - test_cmp expect.bar actual.bar -' + M 040000 $tree $root + M 644 inline foo/foo + data <actual.foo && + git show N8:foo/bar >actual.bar && + test_cmp expect.foo actual.foo && + test_cmp expect.bar actual.bar + ' -test_expect_success 'N: extract subtree' ' - branch=$(git rev-parse --verify refs/heads/branch^{tree}) && - cat >input <<-INPUT_END && - commit refs/heads/N9 - committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE - data <input <<-INPUT_END && + commit refs/heads/N9 + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <expect.baz && - echo hello, world >expect.qux && - git fast-import <<-SETUP_END && - commit refs/heads/N10 - committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE - data <expect.baz && + echo hello, world >expect.qux && + git fast-import <<-SETUP_END && + commit refs/heads/N10 + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data < $GIT_COMMITTER_DATE - data < $GIT_COMMITTER_DATE + data <actual.baz && - git show N11:bar/qux >actual.qux && - git show N11:bar/quux >actual.quux && - test_cmp expect.baz actual.baz && - test_cmp expect.qux actual.qux && - test_cmp expect.qux actual.quux' + M 040000 $tree $root + M 100644 inline foo/bar/qux + data <actual.baz && + git show N11:bar/qux >actual.qux && + git show N11:bar/quux >actual.quux && + test_cmp expect.baz actual.baz && + test_cmp expect.qux actual.qux && + test_cmp expect.qux actual.quux + ' +done ### ### series O @@ -3067,6 +3077,7 @@ test_expect_success 'S: ls with garbage after sha1 must fail' ' # There are two sorts of ways a path can be parsed, depending on whether it is # the last field on the line. Additionally, ls without a has a special # case. Test every occurrence of in the grammar against every error case. +# Paths for the root (empty strings) are tested elsewhere. # # @@ -3321,16 +3332,19 @@ test_path_eol_quoted_fail 'ls (without dataref in commit)' 'ls ' path ### # Setup is carried over from series S. -test_expect_success 'T: ls root tree' ' - sed -e "s/Z\$//" >expect <<-EOF && - 040000 tree $(git rev-parse S^{tree}) Z - EOF - sha1=$(git rev-parse --verify S) && - git fast-import --import-marks=marks <<-EOF >actual && - ls $sha1 "" - EOF - test_cmp expect actual -' +for root in '""' '' +do + test_expect_success "T: ls root ($root) tree" ' + sed -e "s/Z\$//" >expect <<-EOF && + 040000 tree $(git rev-parse S^{tree}) Z + EOF + sha1=$(git rev-parse --verify S) && + git fast-import --import-marks=marks <<-EOF >actual && + ls $sha1 $root + EOF + test_cmp expect actual + ' +done test_expect_success 'T: delete branch' ' git branch to-delete && @@ -3432,30 +3446,33 @@ test_expect_success 'U: validate directory delete result' ' compare_diff_raw expect actual ' -test_expect_success 'U: filedelete root succeeds' ' - cat >input <<-INPUT_END && - commit refs/heads/U - committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE - data <input <<-INPUT_END && + commit refs/heads/U-delete-root + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <expect <<-EOF && - :100644 000000 $f7id $ZERO_OID D hello.c - EOF + test_expect_success "U: validate root ($root) delete result" ' + cat >expect <<-EOF && + :100644 000000 $f7id $ZERO_OID D hello.c + EOF - git diff-tree -M -r U^1 U >actual && + git diff-tree -M -r U U-delete-root >actual && - compare_diff_raw expect actual -' + compare_diff_raw expect actual + ' +done ### ### series V (checkpoint) -- cgit 1.2.3-korg From 212ab23e98546b24a183dce86b90595e1e4b92dd Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Sun, 14 Apr 2024 01:11:59 +0000 Subject: fast-import: remove dead strbuf The strbuf in `note_change_n` is to copy the remainder of `p` before potentially invalidating it when reading the next line. However, `p` is not used after that point. It has been unused since the function was created in a8dd2e7d2b (fast-import: Add support for importing commit notes, 2009-10-09) and looks to be a fossil from adapting `file_change_m`. Remove it. Signed-off-by: Thalia Archibald Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index c8a1e3ef3d..832d0055f9 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2448,7 +2448,6 @@ static void file_change_cr(const char *p, struct branch *b, int rename) static void note_change_n(const char *p, struct branch *b, unsigned char *old_fanout) { - static struct strbuf uq = STRBUF_INIT; struct object_entry *oe; struct branch *s; struct object_id oid, commit_oid; @@ -2513,10 +2512,6 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa die("Invalid ref name or SHA1 expression: %s", p); if (inline_data) { - if (p != uq.buf) { - strbuf_addstr(&uq, p); - p = uq.buf; - } read_next_command(); parse_and_store_blob(&last_blob, &oid, 0); } else if (oe) { -- cgit 1.2.3-korg From 22915955caaff8b49671426aba31cbcee19ed0ac Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Sun, 14 Apr 2024 01:12:04 +0000 Subject: fast-import: improve documentation for path quoting It describes what characters cannot be in an unquoted path, but not their semantics. Reframe it as a definition of unquoted paths. From the perspective of the parser, whether it starts with `"` is what defines whether it will parse it as quoted or unquoted. The restrictions on characters in unquoted paths (with starting-", LF, and spaces) are explained in the quoted paragraph. Move it to the unquoted paragraph and reword. The restriction that the source paths of filecopy and filerename cannot contain SP is only stated in their respective sections. Restate it in the section. Signed-off-by: Thalia Archibald Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index b2607366b9..1882758b8a 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -630,18 +630,24 @@ in octal. Git only supports the following modes: In both formats `` is the complete path of the file to be added (if not already existing) or modified (if already existing). -A `` string must use UNIX-style directory separators (forward -slash `/`), may contain any byte other than `LF`, and must not -start with double quote (`"`). - -A path can use C-style string quoting; this is accepted in all cases -and mandatory if the filename starts with double quote or contains -`LF`. In C-style quoting, the complete name should be surrounded with -double quotes, and any `LF`, backslash, or double quote characters -must be escaped by preceding them with a backslash (e.g., -`"path/with\n, \\ and \" in it"`). - -The value of `` must be in canonical form. That is it must not: +A `` can be written as unquoted bytes or a C-style quoted string. + +When a `` does not start with a double quote (`"`), it is an +unquoted string and is parsed as literal bytes without any escape +sequences. However, if the filename contains `LF` or starts with double +quote, it cannot be represented as an unquoted string and must be +quoted. Additionally, the source `` in `filecopy` or `filerename` +must be quoted if it contains SP. + +When a `` starts with a double quote (`"`), it is a C-style quoted +string, where the complete filename is enclosed in a pair of double +quotes and escape sequences are used. Certain characters must be escaped +by preceding them with a backslash: `LF` is written as `\n`, backslash +as `\\`, and double quote as `\"`. All filenames can be represented as +quoted strings. + +A `` must use UNIX-style directory separators (forward slash `/`) +and its value must be in canonical form. That is it must not: * contain an empty directory component (e.g. `foo//bar` is invalid), * end with a directory separator (e.g. `foo/` is invalid), -- cgit 1.2.3-korg From a923a04b80885368acacaf280eb0db16270e5a5b Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Sun, 14 Apr 2024 01:12:12 +0000 Subject: fast-import: document C-style escapes for paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simply saying “C-style” string quoting is imprecise, as only a subset of C escapes are supported. Document the exact escapes. Signed-off-by: Thalia Archibald Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 6 +++++- t/t9300-fast-import.sh | 10 ++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 1882758b8a..c6082c3b49 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -643,7 +643,11 @@ When a `` starts with a double quote (`"`), it is a C-style quoted string, where the complete filename is enclosed in a pair of double quotes and escape sequences are used. Certain characters must be escaped by preceding them with a backslash: `LF` is written as `\n`, backslash -as `\\`, and double quote as `\"`. All filenames can be represented as +as `\\`, and double quote as `\"`. Some characters may optionally be +written with escape sequences: `\a` for bell, `\b` for backspace, `\f` +for form feed, `\n` for line feed, `\r` for carriage return, `\t` for +horizontal tab, and `\v` for vertical tab. Any byte can be written with +3-digit octal codes (e.g., `\033`). All filenames can be represented as quoted strings. A `` must use UNIX-style directory separators (forward slash `/`) diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 13f98e6688..5cde8f8d01 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -3189,8 +3189,9 @@ test_path_eol_success () { ' } -test_path_eol_success 'quoted spaces' '" hello world.c "' ' hello world.c ' -test_path_eol_success 'unquoted spaces' ' hello world.c ' ' hello world.c ' +test_path_eol_success 'quoted spaces' '" hello world.c "' ' hello world.c ' +test_path_eol_success 'unquoted spaces' ' hello world.c ' ' hello world.c ' +test_path_eol_success 'octal escapes' '"\150\151\056\143"' 'hi.c' # # Valid paths before a space: filecopy (source) and filerename (source). @@ -3256,8 +3257,9 @@ test_path_space_success () { ' } -test_path_space_success 'quoted spaces' '" hello world.c "' ' hello world.c ' -test_path_space_success 'no unquoted spaces' 'hello_world.c' 'hello_world.c' +test_path_space_success 'quoted spaces' '" hello world.c "' ' hello world.c ' +test_path_space_success 'no unquoted spaces' 'hello_world.c' 'hello_world.c' +test_path_space_success 'octal escapes' '"\150\151\056\143"' 'hi.c' # # Test a single commit change with an invalid path. Run it with all occurrences -- cgit 1.2.3-korg From be4d6a371e80e16ae02d1f258103493394e4c155 Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Sun, 14 Apr 2024 01:12:19 +0000 Subject: fast-import: forbid escaped NUL in paths NUL cannot appear in paths. Even disregarding filesystem path limitations, the tree object format delimits with NUL, so such a path cannot be encoded by Git. When a quoted path is unquoted, it could possibly contain NUL from "\000". Forbid it so it isn't truncated. fast-import still has other issues with NUL, but those will be addressed later. Signed-off-by: Thalia Archibald Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 1 + builtin/fast-import.c | 2 ++ t/t9300-fast-import.sh | 1 + 3 files changed, 4 insertions(+) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index c6082c3b49..8b6dde45f1 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -661,6 +661,7 @@ and its value must be in canonical form. That is it must not: The root of the tree can be represented by an empty string as ``. +`` cannot contain NUL, either literally or escaped as `\000`. It is recommended that `` always be encoded using UTF-8. `filedelete` diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 832d0055f9..419ffdcdb5 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2270,6 +2270,8 @@ static void parse_path(struct strbuf *sb, const char *p, const char **endp, if (*p == '"') { if (unquote_c_style(sb, p, endp)) die("Invalid %s: %s", field, command_buf.buf); + if (strlen(sb->buf) != sb->len) + die("NUL in %s: %s", field, command_buf.buf); } else { /* * Unless we are parsing the last field of a line, diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 5cde8f8d01..1e68426852 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -3300,6 +3300,7 @@ test_path_base_fail () { local change="$1" prefix="$2" field="$3" suffix="$4" test_path_fail "$change" 'unclosed " in '"$field" "$prefix" '"hello.c' "$suffix" "Invalid $field" test_path_fail "$change" "invalid escape in quoted $field" "$prefix" '"hello\xff"' "$suffix" "Invalid $field" + test_path_fail "$change" "escaped NUL in quoted $field" "$prefix" '"hello\000"' "$suffix" "NUL in $field" } test_path_eol_quoted_fail () { local change="$1" prefix="$2" field="$3" -- cgit 1.2.3-korg From ab4ad1fa8af719443a8b53fa31fcf84392e21be6 Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Sun, 14 Apr 2024 01:12:27 +0000 Subject: fast-import: make comments more precise The former is somewhat imprecise. The latter became out of sync with the behavior in e814c39c2f (fast-import: refactor parsing of spaces, 2014-06-18). Signed-off-by: Thalia Archibald Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 419ffdcdb5..dc5a9d32dd 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2210,7 +2210,7 @@ static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const ch * * idnum ::= ':' bigint; * - * Return the first character after the value in *endptr. + * Update *endptr to point to the first character after the value. * * Complain if the following character is not what is expected, * either a space or end of the string. @@ -2243,8 +2243,8 @@ static uintmax_t parse_mark_ref_eol(const char *p) } /* - * Parse the mark reference, demanding a trailing space. Return a - * pointer to the space. + * Parse the mark reference, demanding a trailing space. Update *p to + * point to the first character after the space. */ static uintmax_t parse_mark_ref_space(const char **p) { -- cgit 1.2.3-korg