aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2022-11-24 14:57:44 -0500
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2022-11-24 14:57:44 -0500
commitca3e8d7a8c087a9cd6a60bf3c3efc06f2dbe91e7 (patch)
treec744b89b9efc087b94a5ecb0732782886f8a8491
parentd416f2e37ef3164de0a14df4af5381e06553847c (diff)
downloadb4-ca3e8d7a8c087a9cd6a60bf3c3efc06f2dbe91e7.tar.gz
Use BytesGenerator for saving am-ready mboxes
Switch to using BytesGenerator when saving mboxes for git-am consumption. This fixes the problem of the default generator forcing headers to 7-bit. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--b4/__init__.py29
-rw-r--r--b4/ez.py4
-rw-r--r--b4/mbox.py12
-rw-r--r--b4/pr.py2
4 files changed, 25 insertions, 22 deletions
diff --git a/b4/__init__.py b/b4/__init__.py
index e4e2ffe..be2aaf6 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -31,10 +31,11 @@ import requests
from pathlib import Path
from contextlib import contextmanager
-from typing import Optional, Tuple, Set, List, TextIO, Union, Sequence
+from typing import Optional, Tuple, Set, List, BinaryIO, Union, Sequence
from email import charset
charset.add_charset('utf-8', None)
+# Policy we use for saving mail locally
emlpolicy = email.policy.EmailPolicy(utf8=True, cte_type='8bit', max_line_length=None)
try:
@@ -1204,7 +1205,7 @@ class LoreMessage:
self.msg._headers.append((hn, hval)) # noqa
try:
- res = dkim.verify(self.msg.as_bytes(), logger=dkimlogger)
+ res = dkim.verify(self.msg.as_bytes(policy=email.policy.SMTP), logger=dkimlogger)
logger.debug('DKIM verify results: %s=%s', identity, res)
except Exception as ex: # noqa
# Usually, this is due to some DNS resolver failure, which we can't
@@ -1234,7 +1235,7 @@ class LoreMessage:
if not matches:
return
bl = int(matches.groups()[0])
- i, m, p = get_mailinfo(self.msg.as_bytes(), scissors=False)
+ i, m, p = get_mailinfo(self.msg.as_bytes(policy=emlpolicy), scissors=False)
bb = b''
for line in re.sub(rb'[\r\n]*$', b'', m + p).split(b'\n'):
bb += re.sub(rb'[\r\n]*$', b'', line) + b'\r\n'
@@ -1274,7 +1275,7 @@ class LoreMessage:
success = False
trim_body = False
while True:
- attestations = patatt.validate_message(self.msg.as_bytes(), sources, trim_body=trim_body)
+ attestations = patatt.validate_message(self.msg.as_bytes(policy=emlpolicy), sources, trim_body=trim_body)
# Do we have any successes?
for attestation in attestations:
if attestation[0] == patatt.RES_VALID:
@@ -2549,7 +2550,7 @@ def mailsplit_bytes(bmbox: bytes, outdir: str, pipesep: Optional[str] = None) ->
pipesep = codecs.decode(pipesep.encode(), 'unicode_escape')
for chunk in bmbox.split(pipesep.encode()):
if chunk.strip():
- msgs.append(email.message_from_bytes(chunk))
+ msgs.append(email.message_from_bytes(chunk, policy=emlpolicy))
return msgs
logger.debug('Mailsplitting the mbox into %s', outdir)
@@ -2561,7 +2562,7 @@ def mailsplit_bytes(bmbox: bytes, outdir: str, pipesep: Optional[str] = None) ->
# Read in the files
for msg in os.listdir(outdir):
with open(os.path.join(outdir, msg), 'rb') as fh:
- msgs.append(email.message_from_binary_file(fh))
+ msgs.append(email.message_from_binary_file(fh, policy=emlpolicy))
return msgs
@@ -2579,7 +2580,7 @@ def get_pi_search_results(query: str, nocache: bool = False) -> Optional[List[em
logger.debug('Using cached copy: %s', cachedir)
for msg in os.listdir(cachedir):
with open(os.path.join(cachedir, msg), 'rb') as fh:
- msgs.append(email.message_from_binary_file(fh))
+ msgs.append(email.message_from_binary_file(fh, policy=emlpolicy))
return msgs
loc = urllib.parse.urlparse(query_url)
@@ -2623,7 +2624,7 @@ def split_and_dedupe_pi_results(t_mbox: bytes, cachedir: Optional[str] = None) -
pathlib.Path(cachedir).mkdir(parents=True, exist_ok=True)
for at, msg in enumerate(msgs):
with open(os.path.join(cachedir, '%04d' % at), 'wb') as fh:
- fh.write(msg.as_bytes())
+ fh.write(msg.as_bytes(policy=emlpolicy))
return msgs
@@ -2635,7 +2636,7 @@ def get_pi_thread_by_url(t_mbx_url: str, nocache: bool = False):
logger.debug('Using cached copy: %s', cachedir)
for msg in os.listdir(cachedir):
with open(os.path.join(cachedir, msg), 'rb') as fh:
- msgs.append(email.message_from_binary_file(fh))
+ msgs.append(email.message_from_binary_file(fh, policy=emlpolicy))
return msgs
logger.critical('Grabbing thread from %s', t_mbx_url.split('://')[1])
@@ -2726,9 +2727,11 @@ def git_range_to_patches(gitdir: Optional[str], start: str, end: str,
commit], decode=False)
if ecode > 0:
raise RuntimeError(f'Could not get a patch out of {commit}')
- msg = email.message_from_bytes(out)
+ msg = email.message_from_bytes(out, policy=emlpolicy)
msg.set_charset('utf-8')
- msg.replace_header('Content-Transfer-Encoding', '8bit')
+ # Clean subject and From to remove any 7bit-safe encoding
+ msg.replace_header('From', LoreMessage.clean_header(msg.get('From')))
+ msg.replace_header('Subject', LoreMessage.clean_header(msg.get('Subject')))
logger.debug(' %s', msg.get('Subject'))
patches.append((commit, msg))
@@ -2989,13 +2992,13 @@ def get_gpg_uids(keyid: str) -> list:
return uids
-def save_git_am_mbox(msgs: list, dest: TextIO):
+def save_git_am_mbox(msgs: list, dest: BinaryIO):
# Git-am has its own understanding of what "mbox" format is that differs from Python's
# mboxo implementation. Specifically, it never escapes the ">From " lines found in bodies
# unless invoked with --patch-format=mboxrd (this is wrong, because ">From " escapes are also
# required in the original mbox "mboxo" format).
# So, save in the format that git-am expects
- gen = email.generator.Generator(dest, policy=emlpolicy)
+ gen = email.generator.BytesGenerator(dest, policy=emlpolicy)
for msg in msgs:
msg.set_unixfrom('From git@z Thu Jan 1 00:00:00 1970')
gen.flatten(msg, unixfrom=True)
diff --git a/b4/ez.py b/b4/ez.py
index c529ca1..2606d1f 100644
--- a/b4/ez.py
+++ b/b4/ez.py
@@ -482,9 +482,9 @@ def start_new_series(cmdargs: argparse.Namespace) -> None:
if patches:
logger.info('Applying %s patches', len(patches))
logger.info('---')
- ifh = io.StringIO()
+ ifh = io.BytesIO()
b4.save_git_am_mbox(patches, ifh)
- ambytes = ifh.getvalue().encode()
+ ambytes = ifh.getvalue()
ecode, out = b4.git_run_command(None, ['am'], stdin=ambytes, logstderr=True)
logger.info(out.strip())
if ecode > 0:
diff --git a/b4/mbox.py b/b4/mbox.py
index c5309ae..250b8b7 100644
--- a/b4/mbox.py
+++ b/b4/mbox.py
@@ -193,11 +193,11 @@ def make_am(msgs: List[email.message.Message], cmdargs: argparse.Namespace, msgi
if save_maildir:
b4.save_maildir(am_msgs, am_filename)
else:
- with open(am_filename, 'w') as fh:
+ with open(am_filename, 'wb') as fh:
b4.save_git_am_mbox(am_msgs, fh)
else:
am_cover = None
- b4.save_git_am_mbox(am_msgs, sys.stdout)
+ b4.save_git_am_mbox(am_msgs, sys.stdout.buffer)
if lser.has_cover and not cmdargs.nocover:
lser.save_cover(am_cover)
@@ -247,9 +247,9 @@ def make_am(msgs: List[email.message.Message], cmdargs: argparse.Namespace, msgi
if not topdir:
logger.critical('Could not figure out where your git dir is, cannot shazam.')
sys.exit(1)
- ifh = io.StringIO()
+ ifh = io.BytesIO()
b4.save_git_am_mbox(am_msgs, ifh)
- ambytes = ifh.getvalue().encode()
+ ambytes = ifh.getvalue()
if not cmdargs.makefetchhead:
amflags = config.get('shazam-am-flags', '')
sp = shlex.shlex(amflags, posix=True)
@@ -668,7 +668,7 @@ def main(cmdargs: argparse.Namespace) -> None:
logger.info('%s messages in the thread', len(msgs))
if cmdargs.outdir == '-':
logger.info('---')
- b4.save_git_am_mbox(msgs, sys.stdout)
+ b4.save_git_am_mbox(msgs, sys.stdout.buffer)
return
# Check if outdir is a maildir
@@ -712,7 +712,7 @@ def main(cmdargs: argparse.Namespace) -> None:
logger.info('Saved maildir %s', savename)
return
- with open(savename, 'w') as fh:
+ with open(savename, 'wb') as fh:
b4.save_git_am_mbox(msgs, fh)
logger.info('Saved %s', savename)
diff --git a/b4/pr.py b/b4/pr.py
index 9c4b7de..f19d10b 100644
--- a/b4/pr.py
+++ b/b4/pr.py
@@ -554,7 +554,7 @@ def main(cmdargs):
if save_maildir:
b4.save_maildir(msgs, savefile)
else:
- with open(savefile, 'w') as fh:
+ with open(savefile, 'wb') as fh:
b4.save_git_am_mbox(msgs, fh)
logger.info('---')
logger.info('Saved %s', savefile)