Merge branch '7or8bit-decode'

decode b64 bytes for non-multipart message too
abstract b64 decoding behaviour to a function
2024-11-14 19:45:11 +00:00 · 2024-11-14 19:39:17 +00:00 · 2024-11-14 19:37:15 +00:00 · 2024-11-14 19:24:40 +00:00 · 2024-11-14 19:15:37 +00:00 · 2024-11-14 19:05:21 +00:00
2 changed files with 92 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,9 @@
 venv/
+__pycache__/
 .idea/
-testing.py
+# misc testing scripts
+testing*.py
+# i have a symlink here so testing scripts can import gpgmymail
+# as python expects `import gpgmymail` to be importing `gpgmymail.py`
+gpgmymail.py

--- a/86
+++ b/86
@ -34,6 +34,8 @@ import email.mime.application
 import email.mime.multipart
 import email.mime.message
 import typing
+# for decode_email:
+import quopri

 # see: https://gnupg.readthedocs.io/en/latest/
 import gnupg
@ -49,6 +51,86 @@ def is_message_encrypted(message: email.message.Message) -> bool:

    return message.get_content_subtype() == "encrypted"

+def decode_email(message: email.message.Message) -> email.message.Message:
+    """Turn a quoted-printable or base64 encoded email into a 7or8bit encoded
+    email
+
+    :param message: email.message.Message to be decoded
+    :return: decoded email.message.Message"""
+    # this is a kinda hacky way to do this by manipulating the message as a
+    # string but i couldn't get it to work any other way
+    # decoding needed:
+    # as_string() gives us str, encode() gives us bytes
+    decoded_bytes = message.as_bytes()
+    decoded_bytes = quopri.decodestring(decoded_bytes)
+
+    # replace any instances of the Content-Transfer-Encoding header
+    # quopri version, we do base64 version down there
+    decoded_bytes = decoded_bytes.replace(
+        b'Content-Transfer-Encoding: quoted-printable',
+        b'Content-Transfer-Encoding: 7bit'
+    )
+
+    # REALLY hacky but i had issues with the more sensible ways to do this.
+    # iterates through a Message object to find CTEs of base64
+    # gets the b64 payload and the decoded payload
+    # then find and replaces in decoded_bytes the b64 payload
+    # with the decoded payload
+    # lol
+
+    def decode_b64_part(
+        part: email.message.Message,
+        decoded_bytes: bytes,
+        most_recent_boundary: str = None
+    ) -> bytes:
+        if part.get("Content-Transfer-Encoding") == "base64":
+            b64_str = part.get_payload()
+            # remove the boundary as we don't want to change this
+            if most_recent_boundary:
+                b64_str = b64_str.replace(most_recent_boundary, "")
+            # sometimes we have leftover hyphens from a boundary, so strip:
+            # hyphens not in base64 so we know not to use them
+            # strip whitespace first
+            b64_str = b64_str.strip()
+            b64_str = b64_str.strip('-')
+            b64_str = b64_str.encode()  # turn into bytes-like object
+            # this will also decode the boundary so there'll be some nonsese
+            # chars at end of email but it's nbd
+            decoded_b64_str = part.get_payload(decode=True)
+            return decoded_bytes.replace(
+                b64_str,
+                decoded_b64_str
+            )
+
+    quopri_decoded_message = email.message_from_bytes(decoded_bytes)
+    if quopri_decoded_message.is_multipart():
+        most_recent_boundary = None
+        for part in quopri_decoded_message.walk():
+            # multipart and has boundary (not None)
+            if part.is_multipart() and part.get_boundary():
+                most_recent_boundary = part.get_boundary()
+            else:
+                decoded_bytes = decode_b64_part(
+                    part,
+                    decoded_bytes,
+                    most_recent_boundary
+                )
+    else:
+        decoded_bytes = decode_b64_part(
+            quopri_decoded_message,
+            decoded_bytes,
+            None
+        )
+
+    decoded_bytes = decoded_bytes.replace(
+        b'Content-Transfer-Encoding: base64',
+        b'Content-Transfer-Encoding: 7bit'
+    )
+    
+    # if i do message_from_bytes it bizarrely changes it back to base64?
+    # utf-8 has encoding issues so do latin1
+    return email.message_from_string(decoded_bytes.decode("latin1"))
+
 def encrypt(
        message: email.message.Message,
        recipients: typing.List[str],
@ -67,10 +149,14 @@ def encrypt(

    :return: The encrypted email as a string"""
    
+    # exclusion criteria:
    # some mail clients like Thunderbird don't like twice-encrypted emails,
    # so we return the message as-is if it's already encrypted
    if is_message_encrypted(message) and not unconditionally_encrypt:
        return message.as_string()
+
+    # make necessary changes to message
+    message = decode_email(message)
    
    gpg = gnupg.GPG()
    gpg.encoding = encoding
Author	SHA1	Message	Date
revsuine	ec1fda254a	Merge branch '7or8bit-decode'	2024-11-14 19:45:11 +00:00
revsuine	633a54e2b1	decode b64 bytes for non-multipart message too	2024-11-14 19:39:17 +00:00
revsuine	218ee51ac2	abstract b64 decoding behaviour to a function	2024-11-14 19:37:15 +00:00
revsuine	f1a07cb1e0	use message_from_string instead of message_from_bytes cause message_from_bytes bizarrely changes it back to base64	2024-11-14 19:24:40 +00:00
revsuine	f559fef2ed	strip trailing hyphens from base64	2024-11-14 19:15:37 +00:00
revsuine	10f25158bf	try to decode w/o boundary	2024-11-14 19:05:21 +00:00
revsuine	6590baafd1	try do this by replacing entire payload	2024-11-14 18:55:06 +00:00
revsuine	b213a0ab52	don't encode decoded	2024-11-14 18:52:18 +00:00
revsuine	9a3a1f2cb7	encode the find and replaces for b64 decode	2024-11-14 18:46:30 +00:00
revsuine	158f5356a1	syntax error fix	2024-11-14 18:45:30 +00:00
revsuine	82c5144e58	implement really hacky way of decoding b64 parts	2024-11-14 18:44:28 +00:00
revsuine	a21ee759c8	temp remove base64 decoding and unconditionally quopri decode	2024-11-14 18:26:22 +00:00
revsuine	2955e990da	var name typo	2024-11-14 18:15:06 +00:00
revsuine	e7a8f40b91	first attempt to implement decoding as 7or8bit through quopri, base64, and byte replacement	2024-11-14 18:13:16 +00:00
revsuine	8755e0e1cc	gitignore some testing files	2024-11-14 17:39:25 +00:00
revsuine	b78f1f04af	more testing scripts	2024-11-14 15:59:20 +00:00
revsuine	f895e87ec0	account for non-multipart emails	2024-11-14 15:19:57 +00:00
revsuine	ed57c8f0d4	first attempt to implement decoding as 7or8bit (non-working)	2024-11-13 17:22:21 +00:00