diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-02-11 12:38:33 -0500 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-02-11 12:38:33 -0500 |
commit | bb81a9edd5cb32cd900cacc51a7e76acfdd1b5b5 (patch) | |
tree | d6868a7eb4a8954017e24c0fa0b5f2345188e4bd | |
parent | ea55cc54be17c478e7307f8d4eb640988d7822f9 (diff) | |
download | korg-helpers-bb81a9edd5cb32cd900cacc51a7e76acfdd1b5b5.tar.gz |
Properly decode non-utf8 message content
Actually pay attention to what the charset says in the message headers
for the few holdouts who are still sending things as iso-8859-1.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rwxr-xr-x | get-lore-mbox.py | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/get-lore-mbox.py b/get-lore-mbox.py index 63f8838..a548341 100755 --- a/get-lore-mbox.py +++ b/get-lore-mbox.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-or-later # -*- coding: utf-8 -*- # __author__ = 'Konstantin Ryabitsev <konstantin@linuxfoundation.org>' @@ -174,6 +175,9 @@ def get_pi_thread_by_msgid(msgid, config, cmdargs): def get_plain_part(msg, ensurediff=False): # walk until we find the first text/plain part + mcharset = msg.get_content_charset() + if not mcharset: + mcharset = 'utf-8' body = None for part in msg.walk(): if part.get_content_type().find('text/plain') < 0: @@ -181,7 +185,10 @@ def get_plain_part(msg, ensurediff=False): body = part.get_payload(decode=True) if body is None: continue - body = body.decode('utf-8', errors='replace') + pcharset = part.get_content_charset() + if not pcharset: + pcharset = mcharset + body = body.decode(pcharset, errors='replace') if ensurediff and not body_contains_diff(body): continue break |