From 61120dd4e85422d01eccb67a3324c09916a0ea99 Mon Sep 17 00:00:00 2001 From: revsuine Date: Tue, 12 Nov 2024 19:00:56 +0000 Subject: [PATCH 01/31] revert gpgmymail to 40b3ba376024d31386916e1f11438a2f1a905c6f --- gpgmymail | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/gpgmymail b/gpgmymail index 7062dcf..a3c230f 100755 --- a/gpgmymail +++ b/gpgmymail @@ -40,13 +40,6 @@ import gnupg # constants DEFAULT_ENCODING='utf-8' # default is latin-1 which fails w some unicode chars -CTE_TO_ENCODER_DICT = { - "7bit": email.encoders.encode_7or8bit, - "8bit": email.encoders.encode_7or8bit, - "base64": email.encoders.encode_base64, - "quoted-printable": email.encoders.encode_quopri -} -DEFAULT_ENCODER = email.encoders.encode_7or8bit def is_message_encrypted(message: email.message.Message) -> bool: """Determines whether or not an email message is encrypted. @@ -56,24 +49,6 @@ def is_message_encrypted(message: email.message.Message) -> bool: return message.get_content_subtype() == "encrypted" -def get_encoder_from_msg(msg: email.message.Message) -> typing.Callable: - """ - Return a suitable encoder function from email.encoders based on an input - message. If the input message has no Content-Transfer-Encoding header, - or there is no encoder function corresponding to the CTE header, a default - encoder will be returned. - - :param msg: an unencrypted email Message - :return: function from email.encoders, see - https://docs.python.org/3/library/email.encoders.html - """ - cte = msg.get("Content-Transfer-Encoding") - if cte: - encoder = CTE_TO_ENCODER_DICT.get(cte) - else: - return DEFAULT_ENCODER - return encoder if encoder else DEFAULT_ENCODER - def encrypt( message: email.message.Message, recipients: typing.List[str], @@ -107,13 +82,13 @@ def encrypt( enc = email.mime.application.MIMEApplication( _data=str(encrypted_content).encode(), _subtype="octet-stream", - _encoder=get_encoder_from_msg(message) + _encoder=email.encoders.encode_7or8bit ) control = email.mime.application.MIMEApplication( _data=b'Version: 1\n', _subtype='pgp-encrypted; name="msg.asc"', - _encoder=get_encoder_from_msg(message) + _encoder=email.encoders.encode_7or8bit ) control['Content-Disposition'] = 'inline; filename="msg.asc"' From ed57c8f0d46a4369d9bf30dc7c711e45fd7d57a8 Mon Sep 17 00:00:00 2001 From: revsuine Date: Wed, 13 Nov 2024 17:22:21 +0000 Subject: [PATCH 02/31] first attempt to implement decoding as 7or8bit (non-working) --- gpgmymail | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/gpgmymail b/gpgmymail index a3c230f..8b51754 100755 --- a/gpgmymail +++ b/gpgmymail @@ -49,6 +49,23 @@ def is_message_encrypted(message: email.message.Message) -> bool: return message.get_content_subtype() == "encrypted" +def decode_email(message: email.message.Message) -> email.message.Message: + """Turn a quoted-printable or base64 encoded email into a 7or8bit encoded + email + + :param message: email.message.Message to be decoded + :return: decoded email.message.Message""" + payload = [] + + for part in message.walk(): + if not part.is_multipart(): + payload.append(part.get_payload(decode=True)) + + message["Content-Transfer-Encoding"] = "7bit" + message.set_payload(payload) + + return message + def encrypt( message: email.message.Message, recipients: typing.List[str], @@ -67,10 +84,14 @@ def encrypt( :return: The encrypted email as a string""" + # exclusion criteria: # some mail clients like Thunderbird don't like twice-encrypted emails, # so we return the message as-is if it's already encrypted if is_message_encrypted(message) and not unconditionally_encrypt: return message.as_string() + + # make necessary changes to message + message = decode_email(message) gpg = gnupg.GPG() gpg.encoding = encoding From f895e87ec03e30913b04794f5d51ef2e0387e27b Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 15:19:57 +0000 Subject: [PATCH 03/31] account for non-multipart emails --- gpgmymail | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/gpgmymail b/gpgmymail index 8b51754..21c46e6 100755 --- a/gpgmymail +++ b/gpgmymail @@ -55,14 +55,17 @@ def decode_email(message: email.message.Message) -> email.message.Message: :param message: email.message.Message to be decoded :return: decoded email.message.Message""" - payload = [] + if message.is_multipart(): + payload = [] + for part in message.walk(): + if not part.is_multipart(): + payload.append(part.get_payload(decode=True)) - for part in message.walk(): - if not part.is_multipart(): - payload.append(part.get_payload(decode=True)) + message.set_payload(payload) + else: + message.set_payload(message.get_payload(decode=True)) message["Content-Transfer-Encoding"] = "7bit" - message.set_payload(payload) return message From b78f1f04af81196538523896888bdbf5d6d445a3 Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 15:59:20 +0000 Subject: [PATCH 04/31] more testing scripts --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 68ecb9b..6e12153 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ venv/ .idea/ -testing.py +testing*.py From 8755e0e1cc81b761f2cb291f2fa25f6162df8482 Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 17:39:25 +0000 Subject: [PATCH 05/31] gitignore some testing files --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 6e12153..5deb70a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,9 @@ venv/ +__pycache__/ .idea/ +# misc testing scripts testing*.py +# i have a symlink here so testing scripts can import gpgmymail +# as python expects `import gpgmymail` to be importing `gpgmymail.py` +gpgmymail.py From e7a8f40b915b7d169b95e63cfb1ad2d98b0a150e Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 18:13:16 +0000 Subject: [PATCH 06/31] first attempt to implement decoding as 7or8bit through quopri, base64, and byte replacement --- gpgmymail | 48 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/gpgmymail b/gpgmymail index 21c46e6..2010fc3 100755 --- a/gpgmymail +++ b/gpgmymail @@ -34,12 +34,16 @@ import email.mime.application import email.mime.multipart import email.mime.message import typing +# for decode_email: +import quopri +import base64 # see: https://gnupg.readthedocs.io/en/latest/ import gnupg # constants DEFAULT_ENCODING='utf-8' # default is latin-1 which fails w some unicode chars +CTES_TO_BE_DECODED = ("quoted-printable", "base64") def is_message_encrypted(message: email.message.Message) -> bool: """Determines whether or not an email message is encrypted. @@ -55,19 +59,43 @@ def decode_email(message: email.message.Message) -> email.message.Message: :param message: email.message.Message to be decoded :return: decoded email.message.Message""" - if message.is_multipart(): - payload = [] - for part in message.walk(): - if not part.is_multipart(): - payload.append(part.get_payload(decode=True)) + # this is a kinda hacky way to do this by manipulating the message as a + # string but i couldn't get it to work any other way - message.set_payload(payload) - else: - message.set_payload(message.get_payload(decode=True)) + msg_ctes = message.get_all("Content-Transfer-Encoding") + + # this list will be populated with any encoding that needs to be decoded, + # e.g. base64 + # empty if no decoding needed + # set used to avoid dupes + decodes_needed = set() + # check if any of the parts of the message need decoding + for cte in CTES_TO_BE_DECODED: + if cte in msg_ctes: + decodes_needed.add(cte) + # no decoding needed, go ahead with message + if not decodes_needed: + return message - message["Content-Transfer-Encoding"] = "7bit" + # decoding needed: + # as_string() gives us str, encode() gives us bytes + decoded_bytes = msg.as_string().encode() + if "quoted-printable" in decodes_needed: + decoded_bytes = quopri.decodestring(decoded_bytes) + if "base64" in decodes_needed: + decoded_bytes = base64.b64decode(decoded_bytes) - return message + # replace any instances of the Content-Transfer-Encoding header + decoded_bytes = decoded_bytes.replace( + b'Content-Transfer-Encoding: quoted-printable', + b'Content-Transfer-Encoding: 7bit' + ) + decoded_bytes = decoded_bytes.replace( + b'Content-Transfer-Encoding: base64', + b'Content-Transfer-Encoding: 7bit' + ) + + return email.message_from_bytes(decoded_bytes) def encrypt( message: email.message.Message, From 2955e990dadb8f2b493059b6d4bc624e2ace529f Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 18:15:06 +0000 Subject: [PATCH 07/31] var name typo --- gpgmymail | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpgmymail b/gpgmymail index 2010fc3..0164660 100755 --- a/gpgmymail +++ b/gpgmymail @@ -79,7 +79,7 @@ def decode_email(message: email.message.Message) -> email.message.Message: # decoding needed: # as_string() gives us str, encode() gives us bytes - decoded_bytes = msg.as_string().encode() + decoded_bytes = message.as_string().encode() if "quoted-printable" in decodes_needed: decoded_bytes = quopri.decodestring(decoded_bytes) if "base64" in decodes_needed: From a21ee759c87e4419ec0bb34f5d05af8f482ee953 Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 18:26:22 +0000 Subject: [PATCH 08/31] temp remove base64 decoding and unconditionally quopri decode --- gpgmymail | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/gpgmymail b/gpgmymail index 0164660..d7575d9 100755 --- a/gpgmymail +++ b/gpgmymail @@ -36,14 +36,12 @@ import email.mime.message import typing # for decode_email: import quopri -import base64 # see: https://gnupg.readthedocs.io/en/latest/ import gnupg # constants DEFAULT_ENCODING='utf-8' # default is latin-1 which fails w some unicode chars -CTES_TO_BE_DECODED = ("quoted-printable", "base64") def is_message_encrypted(message: email.message.Message) -> bool: """Determines whether or not an email message is encrypted. @@ -61,39 +59,19 @@ def decode_email(message: email.message.Message) -> email.message.Message: :return: decoded email.message.Message""" # this is a kinda hacky way to do this by manipulating the message as a # string but i couldn't get it to work any other way - - msg_ctes = message.get_all("Content-Transfer-Encoding") - - # this list will be populated with any encoding that needs to be decoded, - # e.g. base64 - # empty if no decoding needed - # set used to avoid dupes - decodes_needed = set() - # check if any of the parts of the message need decoding - for cte in CTES_TO_BE_DECODED: - if cte in msg_ctes: - decodes_needed.add(cte) - # no decoding needed, go ahead with message - if not decodes_needed: - return message - # decoding needed: # as_string() gives us str, encode() gives us bytes decoded_bytes = message.as_string().encode() - if "quoted-printable" in decodes_needed: - decoded_bytes = quopri.decodestring(decoded_bytes) - if "base64" in decodes_needed: - decoded_bytes = base64.b64decode(decoded_bytes) + decoded_bytes = quopri.decodestring(decoded_bytes) # replace any instances of the Content-Transfer-Encoding header decoded_bytes = decoded_bytes.replace( b'Content-Transfer-Encoding: quoted-printable', b'Content-Transfer-Encoding: 7bit' ) - decoded_bytes = decoded_bytes.replace( - b'Content-Transfer-Encoding: base64', - b'Content-Transfer-Encoding: 7bit' - ) + + # TODO: base64 decoding, which is more difficult due to the need to not + # treat the whole email like it's base64 return email.message_from_bytes(decoded_bytes) From 82c5144e58807ce026fe055fb823c90f90f59e34 Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 18:44:28 +0000 Subject: [PATCH 09/31] implement really hacky way of decoding b64 parts --- gpgmymail | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/gpgmymail b/gpgmymail index d7575d9..708d9d2 100755 --- a/gpgmymail +++ b/gpgmymail @@ -65,13 +65,36 @@ def decode_email(message: email.message.Message) -> email.message.Message: decoded_bytes = quopri.decodestring(decoded_bytes) # replace any instances of the Content-Transfer-Encoding header + # quopri version, we do base64 version down there decoded_bytes = decoded_bytes.replace( b'Content-Transfer-Encoding: quoted-printable', b'Content-Transfer-Encoding: 7bit' ) - # TODO: base64 decoding, which is more difficult due to the need to not - # treat the whole email like it's base64 + # REALLY hacky but i had issues with the more sensible ways to do this. + # iterates through a Message object to find CTEs of base64 + # gets the b64 payload and the decoded payload + # then find and replaces in decoded_bytes the b64 payload + # with the decoded payload + # lol + quopri_decoded_message = email.message_from_bytes(decoded_bytes) + if quopri_decoded_message.is_multipart(): + for part in quopri_decoded_message.walk(): + if not part.is_multipart(): + if part.get("Content-Transfer-Encoding") == "base64": + b64_str = part.get_payload() + decoded_b64_str = part.get_payload(decode=True) + decoded_bytes = decoded_bytes.replace( + b64_str, + decoded_b64_str + ) + else: + # TODO + + decoded_bytes = decoded_bytes.replace( + b'Content-Transfer-Encoding: base64', + b'Content-Transfer-Encoding: 7bit' + ) return email.message_from_bytes(decoded_bytes) From 158f5356a1b660445cfee022624e2544bb39f3a3 Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 18:45:30 +0000 Subject: [PATCH 10/31] syntax error fix --- gpgmymail | 1 + 1 file changed, 1 insertion(+) diff --git a/gpgmymail b/gpgmymail index 708d9d2..1d23a23 100755 --- a/gpgmymail +++ b/gpgmymail @@ -90,6 +90,7 @@ def decode_email(message: email.message.Message) -> email.message.Message: ) else: # TODO + pass decoded_bytes = decoded_bytes.replace( b'Content-Transfer-Encoding: base64', From 9a3a1f2cb73ea71245e15292429f31db3f282bfc Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 18:46:30 +0000 Subject: [PATCH 11/31] encode the find and replaces for b64 decode --- gpgmymail | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gpgmymail b/gpgmymail index 1d23a23..c725fbf 100755 --- a/gpgmymail +++ b/gpgmymail @@ -82,8 +82,8 @@ def decode_email(message: email.message.Message) -> email.message.Message: for part in quopri_decoded_message.walk(): if not part.is_multipart(): if part.get("Content-Transfer-Encoding") == "base64": - b64_str = part.get_payload() - decoded_b64_str = part.get_payload(decode=True) + b64_str = part.get_payload().encode() + decoded_b64_str = part.get_payload(decode=True).encode() decoded_bytes = decoded_bytes.replace( b64_str, decoded_b64_str From b213a0ab52c6b645c566722979643d6d22c5a394 Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 18:52:18 +0000 Subject: [PATCH 12/31] don't encode decoded --- gpgmymail | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpgmymail b/gpgmymail index c725fbf..84fefb4 100755 --- a/gpgmymail +++ b/gpgmymail @@ -83,7 +83,7 @@ def decode_email(message: email.message.Message) -> email.message.Message: if not part.is_multipart(): if part.get("Content-Transfer-Encoding") == "base64": b64_str = part.get_payload().encode() - decoded_b64_str = part.get_payload(decode=True).encode() + decoded_b64_str = part.get_payload(decode=True) decoded_bytes = decoded_bytes.replace( b64_str, decoded_b64_str From 6590baafd19ae3ff422dc6d4daede19d60f32d0c Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 18:55:06 +0000 Subject: [PATCH 13/31] try do this by replacing entire payload --- gpgmymail | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gpgmymail b/gpgmymail index 84fefb4..41f6813 100755 --- a/gpgmymail +++ b/gpgmymail @@ -60,8 +60,7 @@ def decode_email(message: email.message.Message) -> email.message.Message: # this is a kinda hacky way to do this by manipulating the message as a # string but i couldn't get it to work any other way # decoding needed: - # as_string() gives us str, encode() gives us bytes - decoded_bytes = message.as_string().encode() + decoded_bytes = message.as_bytes() decoded_bytes = quopri.decodestring(decoded_bytes) # replace any instances of the Content-Transfer-Encoding header @@ -82,11 +81,12 @@ def decode_email(message: email.message.Message) -> email.message.Message: for part in quopri_decoded_message.walk(): if not part.is_multipart(): if part.get("Content-Transfer-Encoding") == "base64": - b64_str = part.get_payload().encode() - decoded_b64_str = part.get_payload(decode=True) + new_part = part + new_part.replace_header("Content-Transfer-Encoding", "7bit") + new_part.set_payload(part.get_payload(decode=True)) decoded_bytes = decoded_bytes.replace( - b64_str, - decoded_b64_str + part.as_bytes(), + new_part.as_bytes() ) else: # TODO From 10f25158bfeb450ed7e03d57e43128f46a06caae Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 19:05:21 +0000 Subject: [PATCH 14/31] try to decode w/o boundary --- gpgmymail | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/gpgmymail b/gpgmymail index 41f6813..d5e3033 100755 --- a/gpgmymail +++ b/gpgmymail @@ -36,6 +36,7 @@ import email.mime.message import typing # for decode_email: import quopri +import base64 # see: https://gnupg.readthedocs.io/en/latest/ import gnupg @@ -60,7 +61,8 @@ def decode_email(message: email.message.Message) -> email.message.Message: # this is a kinda hacky way to do this by manipulating the message as a # string but i couldn't get it to work any other way # decoding needed: - decoded_bytes = message.as_bytes() + # as_string() gives us str, encode() gives us bytes + decoded_bytes = message.as_string().encode() decoded_bytes = quopri.decodestring(decoded_bytes) # replace any instances of the Content-Transfer-Encoding header @@ -78,15 +80,20 @@ def decode_email(message: email.message.Message) -> email.message.Message: # lol quopri_decoded_message = email.message_from_bytes(decoded_bytes) if quopri_decoded_message.is_multipart(): + most_recent_boundary = None for part in quopri_decoded_message.walk(): - if not part.is_multipart(): + # multipart and has boundary (not None) + if part.is_multipart() and part.get_boundary(): + most_recent_boundary = part.get_boundary() + else: if part.get("Content-Transfer-Encoding") == "base64": - new_part = part - new_part.replace_header("Content-Transfer-Encoding", "7bit") - new_part.set_payload(part.get_payload(decode=True)) + b64_str = part.get_payload() + # remove the boundary as we don't want to change this + b64_str = b64_str.replace(most_recent_boundary, "") + decoded_b64_str = base64.b64decode(b64_str) decoded_bytes = decoded_bytes.replace( - part.as_bytes(), - new_part.as_bytes() + b64_str, + decoded_b64_str ) else: # TODO From f559fef2ed11e69f766e928bbc3a927135ea5eed Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 19:15:37 +0000 Subject: [PATCH 15/31] strip trailing hyphens from base64 --- gpgmymail | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/gpgmymail b/gpgmymail index d5e3033..4329858 100755 --- a/gpgmymail +++ b/gpgmymail @@ -36,7 +36,6 @@ import email.mime.message import typing # for decode_email: import quopri -import base64 # see: https://gnupg.readthedocs.io/en/latest/ import gnupg @@ -90,7 +89,13 @@ def decode_email(message: email.message.Message) -> email.message.Message: b64_str = part.get_payload() # remove the boundary as we don't want to change this b64_str = b64_str.replace(most_recent_boundary, "") - decoded_b64_str = base64.b64decode(b64_str) + # sometimes we have leftover hyphens from a boundary, so strip: + # hyphens not in base64 so we know not to use them + # strip whitespace first + b64_str = b64_str.strip() + b64_str = b64_str.strip('-') + b64_str = b64_str.encode() # turn into bytes-like object + decoded_b64_str = part.get_payload(decode=True) decoded_bytes = decoded_bytes.replace( b64_str, decoded_b64_str From f1a07cb1e0af372b11ebc5d756aa3a22a1e1ddac Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 19:23:51 +0000 Subject: [PATCH 16/31] use message_from_string instead of message_from_bytes cause message_from_bytes bizarrely changes it back to base64 --- gpgmymail | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gpgmymail b/gpgmymail index 4329858..e7c6073 100755 --- a/gpgmymail +++ b/gpgmymail @@ -61,7 +61,7 @@ def decode_email(message: email.message.Message) -> email.message.Message: # string but i couldn't get it to work any other way # decoding needed: # as_string() gives us str, encode() gives us bytes - decoded_bytes = message.as_string().encode() + decoded_bytes = message.as_bytes() decoded_bytes = quopri.decodestring(decoded_bytes) # replace any instances of the Content-Transfer-Encoding header @@ -108,8 +108,10 @@ def decode_email(message: email.message.Message) -> email.message.Message: b'Content-Transfer-Encoding: base64', b'Content-Transfer-Encoding: 7bit' ) - - return email.message_from_bytes(decoded_bytes) + + # if i do message_from_bytes it bizarrely changes it back to base64? + # utf-8 has encoding issues so do latin1 + return email.message_from_string(decoded_bytes.decode("latin1")) def encrypt( message: email.message.Message, From 218ee51ac2c72457151571e5541488535e78c6b4 Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 19:37:15 +0000 Subject: [PATCH 17/31] abstract b64 decoding behaviour to a function --- gpgmymail | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/gpgmymail b/gpgmymail index e7c6073..b2f539f 100755 --- a/gpgmymail +++ b/gpgmymail @@ -71,6 +71,29 @@ def decode_email(message: email.message.Message) -> email.message.Message: b'Content-Transfer-Encoding: 7bit' ) + def decode_b64_part( + part: email.message.Message, + decoded_bytes: bytes, + most_recent_boundary: str = None + ) -> bytes: + if part.get("Content-Transfer-Encoding") == "base64": + b64_str = part.get_payload() + # remove the boundary as we don't want to change this + b64_str = b64_str.replace(most_recent_boundary, "") + # sometimes we have leftover hyphens from a boundary, so strip: + # hyphens not in base64 so we know not to use them + # strip whitespace first + b64_str = b64_str.strip() + b64_str = b64_str.strip('-') + b64_str = b64_str.encode() # turn into bytes-like object + # this will also decode the boundary so there'll be some nonsese + # chars at end of email but it's nbd + decoded_b64_str = part.get_payload(decode=True) + return decoded_bytes.replace( + b64_str, + decoded_b64_str + ) + # REALLY hacky but i had issues with the more sensible ways to do this. # iterates through a Message object to find CTEs of base64 # gets the b64 payload and the decoded payload @@ -85,21 +108,11 @@ def decode_email(message: email.message.Message) -> email.message.Message: if part.is_multipart() and part.get_boundary(): most_recent_boundary = part.get_boundary() else: - if part.get("Content-Transfer-Encoding") == "base64": - b64_str = part.get_payload() - # remove the boundary as we don't want to change this - b64_str = b64_str.replace(most_recent_boundary, "") - # sometimes we have leftover hyphens from a boundary, so strip: - # hyphens not in base64 so we know not to use them - # strip whitespace first - b64_str = b64_str.strip() - b64_str = b64_str.strip('-') - b64_str = b64_str.encode() # turn into bytes-like object - decoded_b64_str = part.get_payload(decode=True) - decoded_bytes = decoded_bytes.replace( - b64_str, - decoded_b64_str - ) + decoded_bytes = decode_b64_part( + part, + decoded_bytes, + most_recent_boundary + ) else: # TODO pass From 633a54e2b1ed0cd1cf2a1f67dd8d4812ff1a11f9 Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 19:39:17 +0000 Subject: [PATCH 18/31] decode b64 bytes for non-multipart message too --- gpgmymail | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/gpgmymail b/gpgmymail index b2f539f..4d78d91 100755 --- a/gpgmymail +++ b/gpgmymail @@ -71,6 +71,13 @@ def decode_email(message: email.message.Message) -> email.message.Message: b'Content-Transfer-Encoding: 7bit' ) + # REALLY hacky but i had issues with the more sensible ways to do this. + # iterates through a Message object to find CTEs of base64 + # gets the b64 payload and the decoded payload + # then find and replaces in decoded_bytes the b64 payload + # with the decoded payload + # lol + def decode_b64_part( part: email.message.Message, decoded_bytes: bytes, @@ -79,7 +86,8 @@ def decode_email(message: email.message.Message) -> email.message.Message: if part.get("Content-Transfer-Encoding") == "base64": b64_str = part.get_payload() # remove the boundary as we don't want to change this - b64_str = b64_str.replace(most_recent_boundary, "") + if most_recent_boundary: + b64_str = b64_str.replace(most_recent_boundary, "") # sometimes we have leftover hyphens from a boundary, so strip: # hyphens not in base64 so we know not to use them # strip whitespace first @@ -94,12 +102,6 @@ def decode_email(message: email.message.Message) -> email.message.Message: decoded_b64_str ) - # REALLY hacky but i had issues with the more sensible ways to do this. - # iterates through a Message object to find CTEs of base64 - # gets the b64 payload and the decoded payload - # then find and replaces in decoded_bytes the b64 payload - # with the decoded payload - # lol quopri_decoded_message = email.message_from_bytes(decoded_bytes) if quopri_decoded_message.is_multipart(): most_recent_boundary = None @@ -114,8 +116,11 @@ def decode_email(message: email.message.Message) -> email.message.Message: most_recent_boundary ) else: - # TODO - pass + decoded_bytes = decode_b64_part( + quopri_decoded_message, + decoded_bytes, + None + ) decoded_bytes = decoded_bytes.replace( b'Content-Transfer-Encoding: base64', From c148b94be1ddc672217181c1a9396b75e10055cc Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 19:47:59 +0000 Subject: [PATCH 19/31] decode_b64_part always returns something --- gpgmymail | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gpgmymail b/gpgmymail index 4d78d91..03132a1 100755 --- a/gpgmymail +++ b/gpgmymail @@ -101,6 +101,8 @@ def decode_email(message: email.message.Message) -> email.message.Message: b64_str, decoded_b64_str ) + + return decoded_bytes quopri_decoded_message = email.message_from_bytes(decoded_bytes) if quopri_decoded_message.is_multipart(): From 8fea3305376f550ba33a60494ed00a6dc6a19e7d Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 19:51:56 +0000 Subject: [PATCH 20/31] doc --- gpgmymail | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/gpgmymail b/gpgmymail index 03132a1..3bf1705 100755 --- a/gpgmymail +++ b/gpgmymail @@ -83,6 +83,19 @@ def decode_email(message: email.message.Message) -> email.message.Message: decoded_bytes: bytes, most_recent_boundary: str = None ) -> bytes: + """ + change decoded_bytes such that part is decoded if base64 (unchanged if + not) + + see usage below for examples + + :param part: email.message.Message to be decoded + :param decoded_bytes: the email as a bytes object (ie not a string with + encoding), will have modified version returned + :param most_recent_boundary: str of the most recent boundary so we + don't overwrite this + :return: bytes of decoded_bytes with part decoded if base64 + """ if part.get("Content-Transfer-Encoding") == "base64": b64_str = part.get_payload() # remove the boundary as we don't want to change this From 272842162d99c2314a5ac5d1d8a20e71d7c3d067 Mon Sep 17 00:00:00 2001 From: revsuine Date: Thu, 14 Nov 2024 20:12:40 +0000 Subject: [PATCH 21/31] don't decode if decoding unneeded --- gpgmymail | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/gpgmymail b/gpgmymail index 3bf1705..c75b4f2 100755 --- a/gpgmymail +++ b/gpgmymail @@ -57,11 +57,27 @@ def decode_email(message: email.message.Message) -> email.message.Message: :param message: email.message.Message to be decoded :return: decoded email.message.Message""" + def decoded_bytes_to_return_value(decoded_bytes: bytes) -> email.message.Message: + """ + if at any point you want to return, return a call of this function and + pass decoded_bytes + + :param decoded_bytes: an email as an ASCII byte array; this should be + stored in decoded_bytes + :return: the expected return value of decode_email + """ + # if i do message_from_bytes it bizarrely changes it back to base64? + # utf-8 has encoding issues so do latin1 + return email.message_from_string(decoded_bytes.decode("latin1")) + # this is a kinda hacky way to do this by manipulating the message as a # string but i couldn't get it to work any other way - # decoding needed: - # as_string() gives us str, encode() gives us bytes decoded_bytes = message.as_bytes() + # if email doesn't need decoding + has_quopri = b'Content-Transfer-Encoding: quoted-printable' in decoded_bytes + has_base64 = b'Content-Transfer-Encoding: base64' in decoded_bytes + if not (has_quopri or has_base64): + return message decoded_bytes = quopri.decodestring(decoded_bytes) # replace any instances of the Content-Transfer-Encoding header @@ -71,6 +87,10 @@ def decode_email(message: email.message.Message) -> email.message.Message: b'Content-Transfer-Encoding: 7bit' ) + # now exit if there's no base64 as i think that's the most fucky + if not has_base64: + return decoded_bytes_to_return_value(decoded_bytes) + # REALLY hacky but i had issues with the more sensible ways to do this. # iterates through a Message object to find CTEs of base64 # gets the b64 payload and the decoded payload @@ -142,9 +162,7 @@ def decode_email(message: email.message.Message) -> email.message.Message: b'Content-Transfer-Encoding: 7bit' ) - # if i do message_from_bytes it bizarrely changes it back to base64? - # utf-8 has encoding issues so do latin1 - return email.message_from_string(decoded_bytes.decode("latin1")) + return decoded_bytes_to_return_value(decoded_bytes) def encrypt( message: email.message.Message, From 4670bcdafd1ed6e0da0b1e16c44d0809bc78ef77 Mon Sep 17 00:00:00 2001 From: revsuine Date: Fri, 15 Nov 2024 14:36:54 +0000 Subject: [PATCH 22/31] wrap some things in try/except blocks --- gpgmymail | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/gpgmymail b/gpgmymail index c75b4f2..f9a5473 100755 --- a/gpgmymail +++ b/gpgmymail @@ -72,7 +72,12 @@ def decode_email(message: email.message.Message) -> email.message.Message: # this is a kinda hacky way to do this by manipulating the message as a # string but i couldn't get it to work any other way - decoded_bytes = message.as_bytes() + + # sometimes this raises an exception and idk why + try: + decoded_bytes = message.as_bytes() + except UnicodeEncodeError: + decoded_bytes = message.as_string().encode() # if email doesn't need decoding has_quopri = b'Content-Transfer-Encoding: quoted-printable' in decoded_bytes has_base64 = b'Content-Transfer-Encoding: base64' in decoded_bytes @@ -189,7 +194,12 @@ def encrypt( return message.as_string() # make necessary changes to message - message = decode_email(message) + # this function is quite clunky and seems to throw exceptions from time to + # time; if we can't make the necessary changes we want to just continue + try: + message = decode_email(message) + except: + pass gpg = gnupg.GPG() gpg.encoding = encoding From 0802cf5b3d0692b95454245dc886b3820aa4d0d2 Mon Sep 17 00:00:00 2001 From: revsuine Date: Fri, 15 Nov 2024 17:02:42 +0000 Subject: [PATCH 23/31] add --ignore-errors arg & skip encrypting base64 emails --- gpgmymail | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/gpgmymail b/gpgmymail index f9a5473..633dbb1 100755 --- a/gpgmymail +++ b/gpgmymail @@ -174,7 +174,8 @@ def encrypt( recipients: typing.List[str], *, unconditionally_encrypt: bool = False, - encoding: str = DEFAULT_ENCODING + encoding: str = DEFAULT_ENCODING, + ignore_errors: bool = False ) -> str: """Encrypt given message @@ -184,22 +185,39 @@ def encrypt( False (default), will NOT encrypt if any of the following conditions are met: - The message is already encrypted + - The message is encoded with base64 + :param encoding: string for encoding to use for the gnupg.GPG object + :param ignore_errors: bool, puts some parts of the function in + + try: + do_stuff() + except: + pass + + blocks. find ignore_errors to see instances where this occurs :return: The encrypted email as a string""" # exclusion criteria: # some mail clients like Thunderbird don't like twice-encrypted emails, # so we return the message as-is if it's already encrypted - if is_message_encrypted(message) and not unconditionally_encrypt: - return message.as_string() + if not unconditionally_encrypt: + if is_message_encrypted(message): + return message.as_string() + # bc i just have a bunch of issues w b64 + if "Content-Transfer-Encoding: base64" in message.as_string(): + return message.as_string() # make necessary changes to message # this function is quite clunky and seems to throw exceptions from time to # time; if we can't make the necessary changes we want to just continue try: message = decode_email(message) - except: - pass + except Exception as e: + if ignore_errors: + pass + else: + raise e gpg = gnupg.GPG() gpg.encoding = encoding @@ -257,6 +275,8 @@ def main() -> None: help="Encoding to use for the gnupg.GPG object") parser.add_argument('--unconditional', action="store_true", help="Encrypt mail unconditionally. By default, mail is not encrypted if it is already encrypted.") + parser.add_argument('--ignore-errors', action="store_true", + help="Ignore errors at certain error-prone points of the script.") parser.add_argument('recipient', nargs='*', help="Key ID or email of keys to encrypt for") args = parser.parse_args() @@ -269,7 +289,8 @@ def main() -> None: msg, args.recipient, unconditionally_encrypt=args.unconditional, - encoding=args.encoding + encoding=args.encoding, + ignore_errors=args.ignore_errors )) if __name__ == '__main__': From 182e1ceb43d5cc315a167c1d4bd8f483d35efb8c Mon Sep 17 00:00:00 2001 From: revsuine Date: Fri, 15 Nov 2024 17:40:22 +0000 Subject: [PATCH 24/31] conform to pEp standards --- gpgmymail | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/gpgmymail b/gpgmymail index 633dbb1..c0acc7b 100755 --- a/gpgmymail +++ b/gpgmymail @@ -42,6 +42,7 @@ import gnupg # constants DEFAULT_ENCODING='utf-8' # default is latin-1 which fails w some unicode chars +PEP_SUBJECT='=?utf-8?Q?p=E2=89=A1p?=' def is_message_encrypted(message: email.message.Message) -> bool: """Determines whether or not an email message is encrypted. @@ -169,6 +170,27 @@ def decode_email(message: email.message.Message) -> email.message.Message: return decoded_bytes_to_return_value(decoded_bytes) +def set_email_header( + message: email.message.Message, + name: str, + value: str +) -> None: + """ + Set the header of an email Message. Will either replace the first instance + of the header, or if the header is not present, will add the header. + + Note: Python passes objects as references, so there is no need for a return + value. + + :param message: the Message object to be modified + :param name: the email header to set + :param value: the value to set the header to + """ + if message.get(name): + message.replace_header(name, value) + else: + message.add_header(name, value) + def encrypt( message: email.message.Message, recipients: typing.List[str], @@ -254,6 +276,10 @@ def encrypt( if key.lower() not in headers_not_to_override: encmsg[key] = value + # mark as confirming to pEp: https://blog.jak-linux.org/2019/06/13/encrypted-email-storage/#pretty-easy-privacy-pp + set_email_header(encmsg, 'Subject', PEP_SUBJECT) + set_email_header(encmsg, 'X-pEp-Version', '2.1') + return encmsg.as_string() def decrypt(message: email.message.Message, *, encoding: str = DEFAULT_ENCODING) -> str: From f5a31997cf36aea6d58e9d37dfea723ad2bc1f8f Mon Sep 17 00:00:00 2001 From: revsuine Date: Fri, 15 Nov 2024 17:55:22 +0000 Subject: [PATCH 25/31] add X-gpgmymail-Status header --- README.md | 11 +++++++++-- gpgmymail | 12 ++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a8dfe15..0c893c5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,14 @@ # gpgmymail -Takes an email from stdin and encrypts it using the recipient's PGP key, -provided as an argument when calling the script. +Takes an email from stdin and encrypts it to stdout using the recipient's PGP +key, provided as an argument when calling the script. + +Leaves a `X-gpgmymail-Status` header on the email, which has the following +statuses: + +* `entered` - the email has entered the encryption function, but not been + encrypted +* `encrypted` - the encryption function has encrypted the email Written to be a Sieve filter to be used with `sieve_extprograms`. Can be used in a Sieve filter e.g.: diff --git a/gpgmymail b/gpgmymail index c0acc7b..8622548 100755 --- a/gpgmymail +++ b/gpgmymail @@ -24,6 +24,12 @@ works well for emails created with this tool. When encrypting, the tool preserves all headers in the original email in the encrypted part, and copies relevant headers to the output. When decrypting, any headers are ignored, and only the encrypted headers are restored. + +Emails exiting this script will have the 'X-gpgmymail-Status' header, which has +the following options: + +- entered: the email has entered the encrypt() function +- encrypted: the email has been encrypted """ import argparse @@ -219,6 +225,9 @@ def encrypt( blocks. find ignore_errors to see instances where this occurs :return: The encrypted email as a string""" + + # mark the email as having passed through us + set_email_header(message, 'X-gpgmymail-Status', 'entered') # exclusion criteria: # some mail clients like Thunderbird don't like twice-encrypted emails, @@ -280,6 +289,9 @@ def encrypt( set_email_header(encmsg, 'Subject', PEP_SUBJECT) set_email_header(encmsg, 'X-pEp-Version', '2.1') + # we have encrypted the email, set our gpgmymail header appropriately + set_email_header(encmsg, 'X-gpgmymail-Status', 'encrypted') + return encmsg.as_string() def decrypt(message: email.message.Message, *, encoding: str = DEFAULT_ENCODING) -> str: From 638f622f243e40db8938debec0773f75b00ec525 Mon Sep 17 00:00:00 2001 From: revsuine Date: Sat, 16 Nov 2024 15:44:27 +0000 Subject: [PATCH 26/31] don't decode base64 non-text parts --- gpgmymail | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/gpgmymail b/gpgmymail index 8622548..92fded4 100755 --- a/gpgmymail +++ b/gpgmymail @@ -87,7 +87,7 @@ def decode_email(message: email.message.Message) -> email.message.Message: decoded_bytes = message.as_string().encode() # if email doesn't need decoding has_quopri = b'Content-Transfer-Encoding: quoted-printable' in decoded_bytes - has_base64 = b'Content-Transfer-Encoding: base64' in decoded_bytes + has_base64 = is_base64_text(message) # we don't want to decode non-text if not (has_quopri or has_base64): return message decoded_bytes = quopri.decodestring(decoded_bytes) @@ -117,7 +117,7 @@ def decode_email(message: email.message.Message) -> email.message.Message: ) -> bytes: """ change decoded_bytes such that part is decoded if base64 (unchanged if - not) + not) and text (so will not decode base64 images etc) see usage below for examples @@ -128,7 +128,8 @@ def decode_email(message: email.message.Message) -> email.message.Message: don't overwrite this :return: bytes of decoded_bytes with part decoded if base64 """ - if part.get("Content-Transfer-Encoding") == "base64": + if part.get("Content-Transfer-Encoding") == "base64" and \ + part.get_content_maintype() == "text": b64_str = part.get_payload() # remove the boundary as we don't want to change this if most_recent_boundary: @@ -197,6 +198,25 @@ def set_email_header( else: message.add_header(name, value) +def is_base64_text(message: email.message.Message) -> bool: + """ + Return whether or not there is base64-encoded text in a multipart + message, i.e. will be False if it's only e.g. images that are encoded as + base64. + + :param message: the email Message to check + :return: True if there is a text part that's base64 encoded, False if not + """ + if message.is_multipart(): + for part in message.walk(): + if part.get('Content-Transfer-Encoding') == "base64" and \ + part.get_content_maintype() == "text": + return True + return False + else: + return message.get('Content-Transfer-Encoding') == "base64" and \ + message.get_content_maintype() == "text" + def encrypt( message: email.message.Message, recipients: typing.List[str], @@ -236,7 +256,7 @@ def encrypt( if is_message_encrypted(message): return message.as_string() # bc i just have a bunch of issues w b64 - if "Content-Transfer-Encoding: base64" in message.as_string(): + if is_base64_text(message): return message.as_string() # make necessary changes to message From 3ca2cca4697e13674a646e7bf92f5e2632167de9 Mon Sep 17 00:00:00 2001 From: revsuine Date: Sat, 16 Nov 2024 15:52:32 +0000 Subject: [PATCH 27/31] update README to better describe my changes --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0c893c5..fa11a05 100644 --- a/README.md +++ b/README.md @@ -40,5 +40,6 @@ behaviour which is much better achieved with Sieve will be implemented, e.g. # Credits * Julian Klode for the [original code](https://github.com/julian-klode/ansible.jak-linux.org/blob/dovecot/roles/mailserver/files/usr/local/lib/dovecot-sieve-filters/gpgmymail) -* revsuine for modifications to gpgmymail +* revsuine for modifications to gpgmymail, mostly to make it work well with + Thunderbird From 224de12f0980e09805f0a3f1191078c81672e3c4 Mon Sep 17 00:00:00 2001 From: revsuine Date: Sat, 16 Nov 2024 19:00:00 +0000 Subject: [PATCH 28/31] no longer change subject line or add X-pEp-Version header --- gpgmymail | 4 ---- 1 file changed, 4 deletions(-) diff --git a/gpgmymail b/gpgmymail index 92fded4..ad5fe67 100755 --- a/gpgmymail +++ b/gpgmymail @@ -305,10 +305,6 @@ def encrypt( if key.lower() not in headers_not_to_override: encmsg[key] = value - # mark as confirming to pEp: https://blog.jak-linux.org/2019/06/13/encrypted-email-storage/#pretty-easy-privacy-pp - set_email_header(encmsg, 'Subject', PEP_SUBJECT) - set_email_header(encmsg, 'X-pEp-Version', '2.1') - # we have encrypted the email, set our gpgmymail header appropriately set_email_header(encmsg, 'X-gpgmymail-Status', 'encrypted') From f5a0d3fddec589cc7ac96fa1252373b7fc174ecb Mon Sep 17 00:00:00 2001 From: revsuine Date: Sun, 17 Nov 2024 23:39:48 +0000 Subject: [PATCH 29/31] make decoding emails optional --- gpgmymail | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/gpgmymail b/gpgmymail index ad5fe67..f9b4308 100755 --- a/gpgmymail +++ b/gpgmymail @@ -223,7 +223,8 @@ def encrypt( *, unconditionally_encrypt: bool = False, encoding: str = DEFAULT_ENCODING, - ignore_errors: bool = False + ignore_errors: bool = False, + decode_before_encrypting: bool = False ) -> str: """Encrypt given message @@ -262,13 +263,14 @@ def encrypt( # make necessary changes to message # this function is quite clunky and seems to throw exceptions from time to # time; if we can't make the necessary changes we want to just continue - try: - message = decode_email(message) - except Exception as e: - if ignore_errors: - pass - else: - raise e + if decode_before_encrypting: + try: + message = decode_email(message) + except Exception as e: + if ignore_errors: + pass + else: + raise e gpg = gnupg.GPG() gpg.encoding = encoding @@ -331,6 +333,8 @@ def main() -> None: help="Encrypt mail unconditionally. By default, mail is not encrypted if it is already encrypted.") parser.add_argument('--ignore-errors', action="store_true", help="Ignore errors at certain error-prone points of the script.") + parser.add_argument('--decode', action="store_true", + "Attempt to decode quoted-printable and base64 parts to latin-1 before encrypting a message") parser.add_argument('recipient', nargs='*', help="Key ID or email of keys to encrypt for") args = parser.parse_args() @@ -344,7 +348,8 @@ def main() -> None: args.recipient, unconditionally_encrypt=args.unconditional, encoding=args.encoding, - ignore_errors=args.ignore_errors + ignore_errors=args.ignore_errors, + decode_before_encrypting=args.decode )) if __name__ == '__main__': From e0c200c95e212476a20a3458bc62b32f52f7220b Mon Sep 17 00:00:00 2001 From: revsuine Date: Mon, 18 Nov 2024 15:16:19 +0000 Subject: [PATCH 30/31] syntax error fix --- gpgmymail | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpgmymail b/gpgmymail index f9b4308..330adc1 100755 --- a/gpgmymail +++ b/gpgmymail @@ -334,7 +334,7 @@ def main() -> None: parser.add_argument('--ignore-errors', action="store_true", help="Ignore errors at certain error-prone points of the script.") parser.add_argument('--decode', action="store_true", - "Attempt to decode quoted-printable and base64 parts to latin-1 before encrypting a message") + help="Attempt to decode quoted-printable and base64 parts to latin-1 before encrypting a message") parser.add_argument('recipient', nargs='*', help="Key ID or email of keys to encrypt for") args = parser.parse_args() From f7bb04e5ecf404f13a610e97809e9fc2b1a2f55d Mon Sep 17 00:00:00 2001 From: revsuine Date: Sat, 23 Nov 2024 02:54:50 +0000 Subject: [PATCH 31/31] gpgmymail: remove all 'decoding' code --- gpgmymail | 170 ------------------------------------------------------ 1 file changed, 170 deletions(-) diff --git a/gpgmymail b/gpgmymail index 330adc1..b42e891 100755 --- a/gpgmymail +++ b/gpgmymail @@ -40,15 +40,12 @@ import email.mime.application import email.mime.multipart import email.mime.message import typing -# for decode_email: -import quopri # see: https://gnupg.readthedocs.io/en/latest/ import gnupg # constants DEFAULT_ENCODING='utf-8' # default is latin-1 which fails w some unicode chars -PEP_SUBJECT='=?utf-8?Q?p=E2=89=A1p?=' def is_message_encrypted(message: email.message.Message) -> bool: """Determines whether or not an email message is encrypted. @@ -58,125 +55,6 @@ def is_message_encrypted(message: email.message.Message) -> bool: return message.get_content_subtype() == "encrypted" -def decode_email(message: email.message.Message) -> email.message.Message: - """Turn a quoted-printable or base64 encoded email into a 7or8bit encoded - email - - :param message: email.message.Message to be decoded - :return: decoded email.message.Message""" - def decoded_bytes_to_return_value(decoded_bytes: bytes) -> email.message.Message: - """ - if at any point you want to return, return a call of this function and - pass decoded_bytes - - :param decoded_bytes: an email as an ASCII byte array; this should be - stored in decoded_bytes - :return: the expected return value of decode_email - """ - # if i do message_from_bytes it bizarrely changes it back to base64? - # utf-8 has encoding issues so do latin1 - return email.message_from_string(decoded_bytes.decode("latin1")) - - # this is a kinda hacky way to do this by manipulating the message as a - # string but i couldn't get it to work any other way - - # sometimes this raises an exception and idk why - try: - decoded_bytes = message.as_bytes() - except UnicodeEncodeError: - decoded_bytes = message.as_string().encode() - # if email doesn't need decoding - has_quopri = b'Content-Transfer-Encoding: quoted-printable' in decoded_bytes - has_base64 = is_base64_text(message) # we don't want to decode non-text - if not (has_quopri or has_base64): - return message - decoded_bytes = quopri.decodestring(decoded_bytes) - - # replace any instances of the Content-Transfer-Encoding header - # quopri version, we do base64 version down there - decoded_bytes = decoded_bytes.replace( - b'Content-Transfer-Encoding: quoted-printable', - b'Content-Transfer-Encoding: 7bit' - ) - - # now exit if there's no base64 as i think that's the most fucky - if not has_base64: - return decoded_bytes_to_return_value(decoded_bytes) - - # REALLY hacky but i had issues with the more sensible ways to do this. - # iterates through a Message object to find CTEs of base64 - # gets the b64 payload and the decoded payload - # then find and replaces in decoded_bytes the b64 payload - # with the decoded payload - # lol - - def decode_b64_part( - part: email.message.Message, - decoded_bytes: bytes, - most_recent_boundary: str = None - ) -> bytes: - """ - change decoded_bytes such that part is decoded if base64 (unchanged if - not) and text (so will not decode base64 images etc) - - see usage below for examples - - :param part: email.message.Message to be decoded - :param decoded_bytes: the email as a bytes object (ie not a string with - encoding), will have modified version returned - :param most_recent_boundary: str of the most recent boundary so we - don't overwrite this - :return: bytes of decoded_bytes with part decoded if base64 - """ - if part.get("Content-Transfer-Encoding") == "base64" and \ - part.get_content_maintype() == "text": - b64_str = part.get_payload() - # remove the boundary as we don't want to change this - if most_recent_boundary: - b64_str = b64_str.replace(most_recent_boundary, "") - # sometimes we have leftover hyphens from a boundary, so strip: - # hyphens not in base64 so we know not to use them - # strip whitespace first - b64_str = b64_str.strip() - b64_str = b64_str.strip('-') - b64_str = b64_str.encode() # turn into bytes-like object - # this will also decode the boundary so there'll be some nonsese - # chars at end of email but it's nbd - decoded_b64_str = part.get_payload(decode=True) - return decoded_bytes.replace( - b64_str, - decoded_b64_str - ) - - return decoded_bytes - - quopri_decoded_message = email.message_from_bytes(decoded_bytes) - if quopri_decoded_message.is_multipart(): - most_recent_boundary = None - for part in quopri_decoded_message.walk(): - # multipart and has boundary (not None) - if part.is_multipart() and part.get_boundary(): - most_recent_boundary = part.get_boundary() - else: - decoded_bytes = decode_b64_part( - part, - decoded_bytes, - most_recent_boundary - ) - else: - decoded_bytes = decode_b64_part( - quopri_decoded_message, - decoded_bytes, - None - ) - - decoded_bytes = decoded_bytes.replace( - b'Content-Transfer-Encoding: base64', - b'Content-Transfer-Encoding: 7bit' - ) - - return decoded_bytes_to_return_value(decoded_bytes) - def set_email_header( message: email.message.Message, name: str, @@ -198,25 +76,6 @@ def set_email_header( else: message.add_header(name, value) -def is_base64_text(message: email.message.Message) -> bool: - """ - Return whether or not there is base64-encoded text in a multipart - message, i.e. will be False if it's only e.g. images that are encoded as - base64. - - :param message: the email Message to check - :return: True if there is a text part that's base64 encoded, False if not - """ - if message.is_multipart(): - for part in message.walk(): - if part.get('Content-Transfer-Encoding') == "base64" and \ - part.get_content_maintype() == "text": - return True - return False - else: - return message.get('Content-Transfer-Encoding') == "base64" and \ - message.get_content_maintype() == "text" - def encrypt( message: email.message.Message, recipients: typing.List[str], @@ -224,7 +83,6 @@ def encrypt( unconditionally_encrypt: bool = False, encoding: str = DEFAULT_ENCODING, ignore_errors: bool = False, - decode_before_encrypting: bool = False ) -> str: """Encrypt given message @@ -234,17 +92,7 @@ def encrypt( False (default), will NOT encrypt if any of the following conditions are met: - The message is already encrypted - - The message is encoded with base64 :param encoding: string for encoding to use for the gnupg.GPG object - :param ignore_errors: bool, puts some parts of the function in - - try: - do_stuff() - except: - pass - - blocks. find ignore_errors to see instances where this occurs - :return: The encrypted email as a string""" # mark the email as having passed through us @@ -256,22 +104,7 @@ def encrypt( if not unconditionally_encrypt: if is_message_encrypted(message): return message.as_string() - # bc i just have a bunch of issues w b64 - if is_base64_text(message): - return message.as_string() - # make necessary changes to message - # this function is quite clunky and seems to throw exceptions from time to - # time; if we can't make the necessary changes we want to just continue - if decode_before_encrypting: - try: - message = decode_email(message) - except Exception as e: - if ignore_errors: - pass - else: - raise e - gpg = gnupg.GPG() gpg.encoding = encoding encrypted_content = gpg.encrypt(message.as_string(), recipients, armor=True) @@ -333,8 +166,6 @@ def main() -> None: help="Encrypt mail unconditionally. By default, mail is not encrypted if it is already encrypted.") parser.add_argument('--ignore-errors', action="store_true", help="Ignore errors at certain error-prone points of the script.") - parser.add_argument('--decode', action="store_true", - help="Attempt to decode quoted-printable and base64 parts to latin-1 before encrypting a message") parser.add_argument('recipient', nargs='*', help="Key ID or email of keys to encrypt for") args = parser.parse_args() @@ -349,7 +180,6 @@ def main() -> None: unconditionally_encrypt=args.unconditional, encoding=args.encoding, ignore_errors=args.ignore_errors, - decode_before_encrypting=args.decode )) if __name__ == '__main__':