Added some extra documentation
This commit is contained in:
36
getmail.py
36
getmail.py
@@ -8,12 +8,11 @@ It will find any emails with attachments, marking them as read, and saving
|
||||
the attachment to the 'attachments' directory for later processing.
|
||||
It will respect the attachment filetype and extension.
|
||||
Attachments are output named with the current date-time, and a semi-random
|
||||
uid designed to crudely prevent namespace collisions. In a VERY high traffic
|
||||
environment where many files may be created per second, this should be
|
||||
re-implemented to be more robust.
|
||||
uid based on the file hash in order to prevent namespace collisions.
|
||||
|
||||
No file locking is used, however files are written to a temporary directory
|
||||
first and renamed upon completion, as renaming is an atomic operation at an
|
||||
OS level.
|
||||
first, flushed, and renamed upon completion, as renaming is an atomic operation
|
||||
at an OS level.
|
||||
|
||||
Attachments will not be created if they have an identical hash to a
|
||||
previously downloaded attachment. This is designed to prevent scenarios where
|
||||
@@ -21,6 +20,15 @@ the same file has been accidentally sent multiple times. Note that this
|
||||
identification is done based on file content, and the name of the file is
|
||||
irrelevant.
|
||||
|
||||
Once an attachment is saved, we look up the mapping of fields in the attachment
|
||||
for input, using a custom mapping based on the domain of the email address of
|
||||
the sender. Different companies may use differing CSV formats, but we work on
|
||||
the assumption tha the XML for input into Logistic will need to be the same and
|
||||
based on a well defined DTD.
|
||||
|
||||
After mapping and conversion to XML, the file is validated against the DTD and
|
||||
written to the xml directory for injection into logistic.
|
||||
|
||||
Logging is fairly primitive and done to a log file in the same directory as
|
||||
the script. This could be upgraded to syslog-style logging if required.
|
||||
|
||||
@@ -29,7 +37,8 @@ STARTTLS. If manual STARTTLS is required, the MailBox method will need to be
|
||||
altered to MailBoxTls. If Outlook or Gmail or similar are used, it will be
|
||||
necessary to implement OAUTH2.
|
||||
|
||||
Authentication is configured in a .env file as described below in the code.
|
||||
Authentication and other user configuration is configured in a .env file as
|
||||
described below in the code.
|
||||
"""
|
||||
|
||||
# Standard libraries
|
||||
@@ -38,7 +47,6 @@ import sys
|
||||
import csv
|
||||
import ssl
|
||||
import json
|
||||
import time
|
||||
import hashlib
|
||||
import logging
|
||||
import tempfile
|
||||
@@ -91,7 +99,7 @@ def save_hashes(hashes_file, hashes):
|
||||
json.dump(list(hashes), f)
|
||||
|
||||
def setup_ssl_context():
|
||||
"""Set up the SSL context."""
|
||||
"""Set up the SSL context for mail connection."""
|
||||
ssl_context = ssl.create_default_context()
|
||||
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_3
|
||||
ssl_context.maximum_version = ssl.TLSVersion.TLSv1_3
|
||||
@@ -140,6 +148,8 @@ def csv_to_xml(csv_file, xml_file, dtd_file, csv_mapping):
|
||||
logging.info("XML file saved to %s", xml_file)
|
||||
|
||||
def get_client_mapping(client_domain):
|
||||
"""Check custom mapping for the client CSV files, returning the custom
|
||||
mapping if one is found."""
|
||||
with open("column_mapping.json", "r", encoding="utf-8") as f:
|
||||
column_mapping = json.load(f)
|
||||
mapping = column_mapping.get(client_domain, column_mapping["default"])
|
||||
@@ -147,7 +157,9 @@ def get_client_mapping(client_domain):
|
||||
|
||||
def process_attachments(mailbox, attachment_path, saved_hashes, xml_docs,
|
||||
dtd_name):
|
||||
"""Process and save email attachments."""
|
||||
"""Process and save email attachments.
|
||||
For each attachment found, save it, marking the email as read, and saving
|
||||
a unique hash so we never process the same attachment again."""
|
||||
for msg in mailbox.fetch(AND(seen=False), mark_seen=False):
|
||||
for att in msg.attachments:
|
||||
attachment_hash = compute_hash(att.payload)
|
||||
@@ -156,7 +168,7 @@ def process_attachments(mailbox, attachment_path, saved_hashes, xml_docs,
|
||||
filename, file_ext = os.path.splitext(att.filename)
|
||||
logging.info('new attachment named %s from %s',
|
||||
filename, msg.from_)
|
||||
client_domain = msg.from_.split('@')[1]
|
||||
client_domain = msg.from_.split('@', maxsplit=1)[1]
|
||||
csv_mapping=get_client_mapping(client_domain)
|
||||
logging.debug('loaded client %s csv mapping %s',
|
||||
client_domain, csv_mapping)
|
||||
@@ -166,7 +178,7 @@ def process_attachments(mailbox, attachment_path, saved_hashes, xml_docs,
|
||||
f"{attachment_hash[:8]}"
|
||||
f"{file_ext}"
|
||||
)
|
||||
xml_name = final_name.split('.')[0] + '.xml'
|
||||
xml_name = final_name.split('.', maxsplit=1)[0] + '.xml'
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
delete=False, dir="temp") as temp_file:
|
||||
@@ -186,7 +198,7 @@ def process_attachments(mailbox, attachment_path, saved_hashes, xml_docs,
|
||||
mailbox.flag(msg.uid, '\\Seen', True)
|
||||
|
||||
def main():
|
||||
"""Code entrypoint."""
|
||||
"""Initialise logging, load environment, and get to work!"""
|
||||
setup_logging()
|
||||
load_environment_variables()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user