7or8bit-decode #5

Manually merged
revsuine merged 17 commits from 7or8bit-decode into master 2024-11-14 19:45:52 +00:00
Showing only changes of commit 218ee51ac2 - Show all commits

View file

@ -71,6 +71,29 @@ def decode_email(message: email.message.Message) -> email.message.Message:
b'Content-Transfer-Encoding: 7bit'
)
def decode_b64_part(
part: email.message.Message,
decoded_bytes: bytes,
most_recent_boundary: str = None
) -> bytes:
if part.get("Content-Transfer-Encoding") == "base64":
b64_str = part.get_payload()
# remove the boundary as we don't want to change this
b64_str = b64_str.replace(most_recent_boundary, "")
# sometimes we have leftover hyphens from a boundary, so strip:
# hyphens not in base64 so we know not to use them
# strip whitespace first
b64_str = b64_str.strip()
b64_str = b64_str.strip('-')
b64_str = b64_str.encode() # turn into bytes-like object
# this will also decode the boundary so there'll be some nonsese
# chars at end of email but it's nbd
decoded_b64_str = part.get_payload(decode=True)
return decoded_bytes.replace(
b64_str,
decoded_b64_str
)
# REALLY hacky but i had issues with the more sensible ways to do this.
# iterates through a Message object to find CTEs of base64
# gets the b64 payload and the decoded payload
@ -85,20 +108,10 @@ def decode_email(message: email.message.Message) -> email.message.Message:
if part.is_multipart() and part.get_boundary():
most_recent_boundary = part.get_boundary()
else:
if part.get("Content-Transfer-Encoding") == "base64":
b64_str = part.get_payload()
# remove the boundary as we don't want to change this
b64_str = b64_str.replace(most_recent_boundary, "")
# sometimes we have leftover hyphens from a boundary, so strip:
# hyphens not in base64 so we know not to use them
# strip whitespace first
b64_str = b64_str.strip()
b64_str = b64_str.strip('-')
b64_str = b64_str.encode() # turn into bytes-like object
decoded_b64_str = part.get_payload(decode=True)
decoded_bytes = decoded_bytes.replace(
b64_str,
decoded_b64_str
decoded_bytes = decode_b64_part(
part,
decoded_bytes,
most_recent_boundary
)
else:
# TODO