Properly decode non-utf8 message content

Actually pay attention to what the charset says in the message headers for the few holdouts who are still sending things as iso-8859-1. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
author: Konstantin Ryabitsev <konstantin@linuxfoundation.org> 2020-02-11 12:38:33 -0500
committer: Konstantin Ryabitsev <konstantin@linuxfoundation.org> 2020-02-11 12:38:33 -0500
commit: bb81a9edd5cb32cd900cacc51a7e76acfdd1b5b5 (patch)
tree: d6868a7eb4a8954017e24c0fa0b5f2345188e4bd
parent: ea55cc54be17c478e7307f8d4eb640988d7822f9 (diff)
download: korg-helpers-bb81a9edd5cb32cd900cacc51a7e76acfdd1b5b5.tar.gz
1 files changed, 8 insertions, 1 deletions
diff --git a/get-lore-mbox.py b/get-lore-mbox.py
index 63f8838..a548341 100755
--- a/get-lore-mbox.py
+++ b/get-lore-mbox.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-or-later
 # -*- coding: utf-8 -*-
 #
 __author__ = 'Konstantin Ryabitsev <konstantin@linuxfoundation.org>'
@@ -174,6 +175,9 @@ def get_pi_thread_by_msgid(msgid, config, cmdargs):
 
 def get_plain_part(msg, ensurediff=False):
     # walk until we find the first text/plain part
+    mcharset = msg.get_content_charset()
+    if not mcharset:
+        mcharset = 'utf-8'
     body = None
     for part in msg.walk():
         if part.get_content_type().find('text/plain') < 0:
@@ -181,7 +185,10 @@ def get_plain_part(msg, ensurediff=False):
         body = part.get_payload(decode=True)
         if body is None:
             continue
-        body = body.decode('utf-8', errors='replace')
+        pcharset = part.get_content_charset()
+        if not pcharset:
+            pcharset = mcharset
+        body = body.decode(pcharset, errors='replace')
         if ensurediff and not body_contains_diff(body):
             continue
         break
author	Konstantin Ryabitsev <konstantin@linuxfoundation.org>	2020-02-11 12:38:33 -0500
committer	Konstantin Ryabitsev <konstantin@linuxfoundation.org>	2020-02-11 12:38:33 -0500
commit	bb81a9edd5cb32cd900cacc51a7e76acfdd1b5b5 (patch)
tree	d6868a7eb4a8954017e24c0fa0b5f2345188e4bd
parent	ea55cc54be17c478e7307f8d4eb640988d7822f9 (diff)
download	korg-helpers-bb81a9edd5cb32cd900cacc51a7e76acfdd1b5b5.tar.gz