#!/usr/bin/env python3
import argparse
import io
import os.path
import pdfminer.high_level
import pdfminer.layout
import re
import sys
import urllib.request


FILE = ("https://ww1.microchip.com/downloads/en/DeviceDoc/"
        "AVR-InstructionSet-Manual-DS40002198.pdf")

section_regex = re.compile(r"^(6\.\d{1,3}?)\s+?(?P<mnemonic>\w+?)\s+?(?:\((?P<mnemonic_2>\w+?)\)\s+?)?[-\u2013]\s+?(?P<name>.+?)\s*?$\s+?\1\.1\s+?Description\s+(?P<description>(?s:.+?))\s+?Operation:", re.MULTILINE)
header_footer_regex = re.compile(r"\s+?\w+?-page \d{1,3}?\s+?Manual\s+?\u00a9 2021 Microchip Technology Inc.\s+?AVR\u00ae Instruction Set Manual\s+?Instruction Description\s*", re.MULTILINE)
page_num_regex = re.compile(r"\b\w+?-page (\d{1,3})")


class Instruction:
    def __init__(self, mnemonic):
        self.mnemonic = mnemonic
        self.name = mnemonic
        self.description = ""
        self.page = 2
        self.mnemonic_2 = ""


def main():
    args = get_arguments()
    docs = get_docs_as_string(FILE)
    instructions = parse_docs(docs)
    write_script(args.output, instructions)


def get_arguments():
    parser = argparse.ArgumentParser()
    help_text = "the location to which the script will be written"
    relative_path = "/../../lib/handlers/asm-docs-avr.js"
    script_path = os.path.realpath(__file__)
    script_dir = os.path.dirname(script_path)
    default_path = os.path.normpath(script_dir + relative_path)
    parser.add_argument("-o", "--output", help=help_text, default=default_path)
    return parser.parse_args()


def get_docs_as_string(url):
    with urllib.request.urlopen(url) as u:
        log_message(f"reading PDF from {url}...")
        pdf_bytes = u.read()
    with io.BytesIO(pdf_bytes) as pdf_io:
        pdf_params = pdfminer.layout.LAParams(boxes_flow=None)
        log_message("extracting text from PDF...")
        return pdfminer.high_level.extract_text(pdf_io, laparams=pdf_params)


def parse_docs(docs):
    instructions = {}
    log_message("searching for pattern matches...")
    for match in section_regex.finditer(docs):
        if match.group("mnemonic") not in instructions:
            instr = Instruction(match.group("mnemonic"))
            instr.name = match.group("name")
            instr.description = process_description(match.group("description"))
            instr.page = page_num_regex.search(docs, match.start()).group(1)
            #print(40 * "-")
            #print(f"Mnemonic: {instr.mnemonic}\nName: {instr.name}")
            #print(f"Description: {instr.description}")
            #print(instr.description)
            instructions[instr.mnemonic] = instr
        else:
            instr = instructions[match.group("mnemonic")]
        if match.group("mnemonic_2"):
            instr.mnemonic_2 = match.group("mnemonic_2")
    return instructions


def process_description(desc):
    # First, remove page header/footer
    desc = header_footer_regex.sub("", desc)
    # Next, combine lines that are separated by a singular newline
    desc = re.sub(r"(?<!\n)\n(?!\n)", " ", desc, flags=re.MULTILINE)
    # Remove leftovers from diagrams
    p = r"^(?:(?:\b\w+?\b\s*?){1,2}|.)$\n{2}"
    desc = re.sub(p, "", desc, flags=re.MULTILINE)
    return desc


def write_script(filename, instructions):
    log_message(f"writing to {filename}...")
    with open(filename, "w") as script:
        script.write("export function getAsmOpcode(opcode) {\n")
        script.write("    if (!opcode) return;\n")
        script.write("    switch (opcode.toUpperCase()) {\n")
        for inst in instructions.values():
            script.write(f"        case \"{inst.mnemonic}\":\n")
            if inst.mnemonic_2:
                script.write(f"        case \"{inst.mnemonic_2}\":\n")
            script.write("            return {\n")
            html = f"{16 * ' '}\"html\": \"<p>"
            html += inst.description.replace("\n\n", "</p><p>")
            html += "</p>\",\n"
            script.write(html)
            script.write(f"{16 * ' '}\"tooltip\": \"{inst.name}\",\n")
            script.write(f"{16 * ' '}\"url\": \"{FILE}#page={inst.page}\",\n")
            script.write(12 * " " + "};\n\n")
        script.write("    }\n}")


def log_message(msg):
    print(f"{sys.argv[0]}: {msg}", file=sys.stderr)


if __name__ == "__main__":
    main()
