aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2012-02-15 12:24:56 +0000
committerCatalin Marinas <catalin.marinas@gmail.com>2012-02-15 12:24:56 +0000
commit0d85bab3926f6c852d9caca0dfc8ca71d237df71 (patch)
treeb781263210933408a481ef63010d8482d3f5847b
parent60c49d0be075292ffadffd21069f23d01a7c5ac1 (diff)
downloadstgit-0d85bab3926f6c852d9caca0dfc8ca71d237df71.tar.gz
Parse commit object header correctly
To allow parsing the header produced by versions of Git newer than the code written to parse it, all commit parsers are expected to skip unknown header lines, so that newer types of header lines can be added safely. The only three things that are promised are: (1) the header ends with an empty line (just an LF, not "a blank line"), (2) unknown lines can be skipped, and (3) a header "field" begins with the field name, followed by a single SP followed by the value. The parser used by StGit, introduced by commit cbe4567 (New StGit core infrastructure: repository operations, 2007-12-19), was accidentally a bit too loose to lose information, and a bit too strict to raise exception when dealing with a line it does not understand. - It used "strip()" to lose whitespaces from both ends, risking a line with only whitespaces to be mistaken as the end of the header. - It used "k, v = line.split(None, 1)", blindly assuming that all header lines (including the ones that the version of StGit may not understand) can safely be split without raising an exception, which is not true if there is no SP on the line. This patch changes the parsing logic so that it: (1) detects end of the hedaer correctly by treating only an empty line as such; (2) handles multi-line fields (a header line that begins with a single SP is appended to the previous line after removing that leading SP but retaining the LF between the line and the previous line) correctly; (3) splits a line at the first SP to find the field name, but only does so when there actually is SP on the line; and (4) ignores lines that cannot be understood without barfing. Updated following comments from Michael Haggerty. Signed-off-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Catalin Marinas <catalin.marinas@gmail.com>
-rw-r--r--stgit/lib/git.py38
1 files changed, 24 insertions, 14 deletions
diff --git a/stgit/lib/git.py b/stgit/lib/git.py
index 56287f6..e7f095e 100644
--- a/stgit/lib/git.py
+++ b/stgit/lib/git.py
@@ -390,21 +390,31 @@ class CommitData(Immutable, Repr):
@return: A new L{CommitData} object
@rtype: L{CommitData}"""
cd = cls(parents = [])
- lines = list(s.splitlines(True))
- for i in xrange(len(lines)):
- line = lines[i].strip()
+ lines = []
+ raw_lines = s.split('\n')
+ # Collapse multi-line header lines
+ for i, line in enumerate(raw_lines):
if not line:
- return cd.set_message(''.join(lines[i+1:]))
- key, value = line.split(None, 1)
- if key == 'tree':
- cd = cd.set_tree(repository.get_tree(value))
- elif key == 'parent':
- cd = cd.add_parent(repository.get_commit(value))
- elif key == 'author':
- cd = cd.set_author(Person.parse(value))
- elif key == 'committer':
- cd = cd.set_committer(Person.parse(value))
- assert False
+ cd = cd.set_message('\n'.join(raw_lines[i+1:]))
+ break
+ if line.startswith(' '):
+ # continuation line
+ lines[-1] += '\n' + line[1:]
+ else:
+ lines.append(line)
+ for line in lines:
+ if ' ' in line:
+ key, value = line.split(' ', 1)
+ if key == 'tree':
+ cd = cd.set_tree(repository.get_tree(value))
+ elif key == 'parent':
+ cd = cd.add_parent(repository.get_commit(value))
+ elif key == 'author':
+ cd = cd.set_author(Person.parse(value))
+ elif key == 'committer':
+ cd = cd.set_committer(Person.parse(value))
+ return cd
+
class Commit(GitObject):
"""Represents a git commit object. All the actual data contents of the