#!/usr/bin/env python3 """ Developed by Greig McGill of Sense7 """ import os import sys import csv import logging from io import BytesIO import lxml.etree as ET def setup_logging(): """Initialize logging configuration.""" logging.basicConfig( level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s', filename='./csv_to_xml.log', filemode='a') logging.debug('%s started in %s', 'csv_to_xml.py', os.getcwd()) def validate_xml(xml_string, dtd_path): """Validate the XML against the DTD""" dtd = ET.DTD(dtd_path) # Convert the XML string to bytes to handle the encoding declaration xml_doc = ET.parse(BytesIO(xml_string.encode('utf-8'))) if dtd.validate(xml_doc): logging.debug("XML is valid against the %s DTD.", dtd_path) else: logging.error("XML validation failed using DTD %s with error %s:", dtd_path, dtd.error_log.filter_from_errors()) # Read CSV and generate XML def csv_to_xml(csv_file, xml_file, dtd_file): """Read the CSV and generate the XML""" root = ET.Element("items") with open(csv_file, newline='', encoding="utf-8") as csvfile: reader = csv.DictReader(csvfile) for row in reader: item = ET.SubElement(root, "item") ET.SubElement(item, "Item_ID").text = row["Item_ID"] ET.SubElement(item, "Item_Name").text = row["Item_Name"] ET.SubElement(item, "Item_Description").text = row["Item_Description"] ET.SubElement(item, "Item_Price").text = row["Item_Price"] ET.SubElement(item, "Item_Quantity").text = row["Item_Quantity"] # Convert to string and validate xml_string = ET.tostring(root, pretty_print=True, xml_declaration=True, encoding="UTF-8").decode("utf-8") logging.info("Attmpting to validate %s against the %s DTD.", xml_file, dtd_file) validate_xml(xml_string, dtd_file) # Save XML to file with open(xml_file, "w", encoding="utf-8") as f: f.write(xml_string) logging.info("XML file saved to %s", xml_file) def main(csv_file, xml_file, dtd_file): """Code entrypoint.""" setup_logging() logging.debug("Attempting conversion of %s to %s using the %s DTD", csv_file, xml_file, dtd_file) csv_to_xml(csv_file, xml_file, dtd_file) if __name__ == "__main__": main(sys.argv[1], sys.argv[2], sys.argv[3])