aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2024-03-19 11:11:34 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2024-03-19 11:11:34 -0400
commitcf5f35d6760b7b1d7187edadbfabcb02882fc676 (patch)
tree7e435f2d7f2ce09e59e78245314ee2f2bd20798e
parent6345d799d294b5de8257939c9672ab9d11b5359b (diff)
downloadb4-cf5f35d6760b7b1d7187edadbfabcb02882fc676.tar.gz
send-receive: add workarounds for python email bugs
Backport workarounds we've implemented for Python's email header bugs that was causing the endpoint to wrap long message-ids. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--misc/send-receive.py102
1 files changed, 98 insertions, 4 deletions
diff --git a/misc/send-receive.py b/misc/send-receive.py
index bbb4087..cbd8aa8 100644
--- a/misc/send-receive.py
+++ b/misc/send-receive.py
@@ -12,19 +12,23 @@ import smtplib
import email
import email.header
import email.policy
+import email.quoprimime
import re
import ezpi
import copy
+import textwrap
from configparser import ConfigParser, ExtendedInterpolation
from string import Template
from email import utils
-from typing import Tuple, Union
+from typing import Tuple, Union, List
from email import charset
charset.add_charset('utf-8', None)
emlpolicy = email.policy.EmailPolicy(utf8=True, cte_type='8bit', max_line_length=None)
+qspecials = re.compile(r'[()<>@,:;.\"\[\]]')
+
DB_VERSION = 1
logger = logging.getLogger('b4-send-receive')
@@ -185,7 +189,7 @@ class SendReceiveListener(object):
body += '\n'
cmsg.set_payload(body, charset='utf-8')
cmsg.set_charset('utf-8')
- bdata = cmsg.as_bytes(policy=email.policy.SMTP)
+ bdata = self.get_msg_as_bytes(cmsg, headers='encode')
destaddrs = [identity]
alwaysbcc = self._config['main'].get('alwayscc')
if alwaysbcc:
@@ -299,6 +303,96 @@ class SendReceiveListener(object):
new_hdrval = re.sub(r'\n?\s+', ' ', decoded)
return new_hdrval.strip()
+ def format_addrs(self, pairs: List[Tuple[str, str]], clean: bool = True) -> str:
+ addrs = list()
+ for pair in pairs:
+ if pair[0] == pair[1]:
+ addrs.append(pair[1])
+ continue
+ if clean:
+ # Remove any quoted-printable header junk from the name
+ pair = (self.clean_header(pair[0]), pair[1])
+ # Work around https://github.com/python/cpython/issues/100900
+ if not pair[0].startswith('=?') and not pair[0].startswith('"') and qspecials.search(pair[0]):
+ quoted = email.utils.quote(pair[0])
+ addrs.append(f'"{quoted}" <{pair[1]}>')
+ continue
+ addrs.append(email.utils.formataddr(pair))
+ return ', '.join(addrs)
+
+ def isascii(self, strval: str) -> bool:
+ try:
+ return strval.isascii()
+ except AttributeError:
+ return all([ord(c) < 128 for c in strval])
+
+ def wrap_header(self, hdr, width: int = 75, nl: str = '\r\n', transform: str = 'preserve') -> bytes:
+ hname, hval = hdr
+ if hname.lower() in ('to', 'cc', 'from', 'x-original-from'):
+ _parts = [f'{hname}: ']
+ first = True
+ for addr in email.utils.getaddresses([hval]):
+ if transform == 'encode' and not self.isascii(addr[0]):
+ addr = (email.quoprimime.header_encode(addr[0].encode(), charset='utf-8'), addr[1])
+ qp = self.format_addrs([addr], clean=False)
+ elif transform == 'decode':
+ qp = self.format_addrs([addr], clean=True)
+ else:
+ qp = self.format_addrs([addr], clean=False)
+ # See if there is enough room on the existing line
+ if first:
+ _parts[-1] += qp
+ first = False
+ continue
+ if len(_parts[-1] + ', ' + qp) > width:
+ _parts[-1] += ', '
+ _parts.append(qp)
+ continue
+ _parts[-1] += ', ' + qp
+ else:
+ if transform == 'decode' and hval.find('?=') >= 0:
+ hdata = f'{hname}: ' + self.clean_header(hval)
+ else:
+ hdata = f'{hname}: {hval}'
+ if transform != 'encode' or self.isascii(hval):
+ if len(hdata) <= width:
+ return hdata.encode()
+ # Use simple textwrap, with a small trick that ensures that long non-breakable
+ # strings don't show up on the next line from the bare header
+ hdata = hdata.replace(': ', ':_', 1)
+ wrapped = textwrap.wrap(hdata, break_long_words=False, break_on_hyphens=False,
+ subsequent_indent=' ', width=width)
+ return nl.join(wrapped).replace(':_', ': ', 1).encode()
+
+ qp = f'{hname}: ' + email.quoprimime.header_encode(hval.encode(), charset='utf-8')
+ # is it longer than width?
+ if len(qp) <= width:
+ return qp.encode()
+
+ _parts = list()
+ while len(qp) > width:
+ wrapat = width - 2
+ if len(_parts):
+ # Also allow for the ' ' at the front on continuation lines
+ wrapat -= 1
+ # Make sure we don't break on a =XX escape sequence
+ while '=' in qp[wrapat - 2:wrapat]:
+ wrapat -= 1
+ _parts.append(qp[:wrapat] + '?=')
+ qp = ('=?utf-8?q?' + qp[wrapat:])
+ _parts.append(qp)
+ return f'{nl} '.join(_parts).encode()
+
+ def get_msg_as_bytes(self, msg: email.message.Message, nl: str = '\r\n', headers: str = 'preserve') -> bytes:
+ bdata = b''
+ for hname, hval in msg.items():
+ bdata += self.wrap_header((hname, str(hval)), nl=nl, transform=headers) + nl.encode()
+ bdata += nl.encode()
+ payload = msg.get_payload(decode=True)
+ for bline in payload.split(b'\n'):
+ bdata += re.sub(rb'[\r\n]*$', b'', bline) + nl.encode()
+ return bdata
+
def receive(self, jdata, resp, reflect: bool = False) -> None:
servicename = self._config['main'].get('myname')
if not servicename:
@@ -426,7 +520,7 @@ class SendReceiveListener(object):
repo = None
if reflect:
- logger.info('Reflecting %s messages back to %s', len(msgs), identity, selector)
+ logger.info('Reflecting %s messages back to %s', len(msgs), identity)
sentaction = 'Reflected'
else:
logger.info('Sending %s messages for %s/%s', len(msgs), identity, selector)
@@ -501,7 +595,7 @@ class SendReceiveListener(object):
destaddrs.update(bccaddrs)
if not self._config['main'].getboolean('dryrun'):
- bdata = msg.as_bytes(policy=email.policy.SMTP)
+ bdata = self.get_msg_as_bytes(msg, headers='encode')
if reflect:
smtp.sendmail(fromaddr, [identity], bdata)
else: