7or8bit-decode #5

Manually merged
revsuine merged 17 commits from 7or8bit-decode into master 2024-11-14 19:45:52 +00:00
2 changed files with 92 additions and 1 deletions

7
.gitignore vendored
View file

@ -1,4 +1,9 @@
venv/ venv/
__pycache__/
.idea/ .idea/
testing.py # misc testing scripts
testing*.py
# i have a symlink here so testing scripts can import gpgmymail
# as python expects `import gpgmymail` to be importing `gpgmymail.py`
gpgmymail.py

View file

@ -34,6 +34,8 @@ import email.mime.application
import email.mime.multipart import email.mime.multipart
import email.mime.message import email.mime.message
import typing import typing
# for decode_email:
import quopri
# see: https://gnupg.readthedocs.io/en/latest/ # see: https://gnupg.readthedocs.io/en/latest/
import gnupg import gnupg
@ -49,6 +51,86 @@ def is_message_encrypted(message: email.message.Message) -> bool:
return message.get_content_subtype() == "encrypted" return message.get_content_subtype() == "encrypted"
def decode_email(message: email.message.Message) -> email.message.Message:
"""Turn a quoted-printable or base64 encoded email into a 7or8bit encoded
email
:param message: email.message.Message to be decoded
:return: decoded email.message.Message"""
# this is a kinda hacky way to do this by manipulating the message as a
# string but i couldn't get it to work any other way
# decoding needed:
# as_string() gives us str, encode() gives us bytes
decoded_bytes = message.as_bytes()
decoded_bytes = quopri.decodestring(decoded_bytes)
# replace any instances of the Content-Transfer-Encoding header
# quopri version, we do base64 version down there
decoded_bytes = decoded_bytes.replace(
b'Content-Transfer-Encoding: quoted-printable',
b'Content-Transfer-Encoding: 7bit'
)
# REALLY hacky but i had issues with the more sensible ways to do this.
# iterates through a Message object to find CTEs of base64
# gets the b64 payload and the decoded payload
# then find and replaces in decoded_bytes the b64 payload
# with the decoded payload
# lol
def decode_b64_part(
part: email.message.Message,
decoded_bytes: bytes,
most_recent_boundary: str = None
) -> bytes:
if part.get("Content-Transfer-Encoding") == "base64":
b64_str = part.get_payload()
# remove the boundary as we don't want to change this
if most_recent_boundary:
b64_str = b64_str.replace(most_recent_boundary, "")
# sometimes we have leftover hyphens from a boundary, so strip:
# hyphens not in base64 so we know not to use them
# strip whitespace first
b64_str = b64_str.strip()
b64_str = b64_str.strip('-')
b64_str = b64_str.encode() # turn into bytes-like object
# this will also decode the boundary so there'll be some nonsese
# chars at end of email but it's nbd
decoded_b64_str = part.get_payload(decode=True)
return decoded_bytes.replace(
b64_str,
decoded_b64_str
)
quopri_decoded_message = email.message_from_bytes(decoded_bytes)
if quopri_decoded_message.is_multipart():
most_recent_boundary = None
for part in quopri_decoded_message.walk():
# multipart and has boundary (not None)
if part.is_multipart() and part.get_boundary():
most_recent_boundary = part.get_boundary()
else:
decoded_bytes = decode_b64_part(
part,
decoded_bytes,
most_recent_boundary
)
else:
decoded_bytes = decode_b64_part(
quopri_decoded_message,
decoded_bytes,
None
)
decoded_bytes = decoded_bytes.replace(
b'Content-Transfer-Encoding: base64',
b'Content-Transfer-Encoding: 7bit'
)
# if i do message_from_bytes it bizarrely changes it back to base64?
# utf-8 has encoding issues so do latin1
return email.message_from_string(decoded_bytes.decode("latin1"))
def encrypt( def encrypt(
message: email.message.Message, message: email.message.Message,
recipients: typing.List[str], recipients: typing.List[str],
@ -67,11 +149,15 @@ def encrypt(
:return: The encrypted email as a string""" :return: The encrypted email as a string"""
# exclusion criteria:
# some mail clients like Thunderbird don't like twice-encrypted emails, # some mail clients like Thunderbird don't like twice-encrypted emails,
# so we return the message as-is if it's already encrypted # so we return the message as-is if it's already encrypted
if is_message_encrypted(message) and not unconditionally_encrypt: if is_message_encrypted(message) and not unconditionally_encrypt:
return message.as_string() return message.as_string()
# make necessary changes to message
message = decode_email(message)
gpg = gnupg.GPG() gpg = gnupg.GPG()
gpg.encoding = encoding gpg.encoding = encoding
encrypted_content = gpg.encrypt(message.as_string(), recipients, armor=True) encrypted_content = gpg.encrypt(message.as_string(), recipients, armor=True)