import mwparserfromhell as mwp
import mwparserfromhell.nodes as nodes
import re
import json
from typing import Dict, List, Any, Optional, Union, Tuple
import os
from pathlib import Path
import sys

class WikivoyageParser:
    def __init__(self):
        self.document_templates = [
            "pagebanner", "mapframe", "routebox", "geo", "isPartOf", 
            "usablecity", "guidecity", "outlinecity"
        ]
        self.listing_templates = ["see", "do", "buy", "eat", "drink", "sleep", "listing"]
        self.root = {
            "type": "root",
            "properties": {},
            "children": []
        }
        self.current_section = self.root

    def parse(self, wikitext: str) -> Dict:
        """Parse wikitext and return structured JSON tree"""
        self.root = {
            "type": "root",
            "properties": {},
            "children": []
        }
        self.current_section = self.root
        
        # Parse the wikitext
        parsed = mwp.parse(wikitext)
        
        # Process the parsed content
        self._process_nodes(parsed)
        
        return self.root

    def _process_nodes(self, wikicode):
        """Process all nodes in the wikicode"""
        current_text = ""
        
        for node in wikicode.nodes:
            # Handle different node types
            if isinstance(node, nodes.heading.Heading):
                # First flush any pending text
                if current_text:
                    self._add_text_node(current_text)
                    current_text = ""
                
                # Create new section
                self._handle_heading(node)
                
            elif isinstance(node, nodes.template.Template):
                # First flush any pending text
                if current_text:
                    self._add_text_node(current_text)
                    current_text = ""
                
                # Handle template
                self._handle_template(node)
                
            elif isinstance(node, nodes.text.Text):
                # Accumulate text
                current_text += str(node.value)
                
            elif isinstance(node, nodes.tag.Tag):
                # Handle tag (potential styling)
                tag_text = self._convert_tag_to_markdown(node)
                current_text += tag_text
                
            elif isinstance(node, nodes.wikilink.Wikilink):
                # Handle wikilink
                link_text = self._convert_wikilink_to_markdown(node)
                current_text += link_text
                
            elif isinstance(node, nodes.external_link.ExternalLink):
                # Handle external link
                link_text = self._convert_external_link_to_markdown(node)
                current_text += link_text
                
            elif isinstance(node, nodes.comment.Comment):
                # Skip comments
                pass
                
            else:
                # Process other nodes as text
                current_text += str(node)
        
        # Add any remaining text
        if current_text:
            self._add_text_node(current_text)

    def _add_text_node(self, text: str):
        """Add a text node to the current section"""
        # Avoid adding empty text nodes
        if not text.strip():
            return
            
        text_node = {
            "type": "text",
            "properties": {
                "markdown": text.strip()
            },
            "children": []
        }
        
        self.current_section["children"].append(text_node)

    def _handle_heading(self, heading_node):
        """Handle a heading node by creating a new section"""
        level = heading_node.level
        title = str(heading_node.title).strip()
        
        # Create new section node
        section = {
            "type": "section",
            "properties": {
                "title": title,
                "level": level
            },
            "children": []
        }
        
        # Find the appropriate parent section based on level
        parent = self.root
        
        # If the level is 1, the parent is the root
        if level > 1:
            # Start from root and traverse the tree
            current = self.root
            current_level = 0
            
            for child in reversed(self._get_all_sections()):
                child_level = child["properties"]["level"]
                if child_level < level:
                    parent = child
                    break
        
        # Add the section to its parent
        parent["children"].append(section)
        
        # Update current section
        self.current_section = section

    def _get_all_sections(self):
        """Get all sections in the document in the order they appear"""
        sections = []
        
        def collect_sections(node):
            if node["type"] == "section":
                sections.append(node)
            for child in node["children"]:
                if child["type"] == "section":
                    collect_sections(child)
        
        collect_sections(self.root)
        return sections

    def _handle_template(self, template_node):
        """Handle a template node"""
        template_name = str(template_node.name).strip().lower()
        
        # Check if it's a document-wide template
        if template_name in self.document_templates:
            self._handle_document_template(template_node)
            return
            
        # Check if it's a listing template
        if template_name in self.listing_templates:
            self._handle_listing_template(template_node)
            return
            
        # Handle other templates as regular nodes
        self._handle_other_template(template_node)

    def _handle_document_template(self, template_node):
        """Handle document-wide templates by adding to root properties"""
        template_name = str(template_node.name).strip().lower()
        
        # Extract parameters
        params = {}
        for param in template_node.params:
            name = str(param.name).strip()
            value = str(param.value).strip()
            params[name] = value
            
        # Add to root properties
        if template_name not in self.root["properties"]:
            self.root["properties"][template_name] = {}
            
        self.root["properties"][template_name] = params

    def _handle_listing_template(self, template_node):
        """Handle listing templates (see, do, buy, eat, drink, sleep)"""
        template_name = str(template_node.name).strip().lower()
        
        # Extract parameters
        properties = {}
        for param in template_node.params:
            name = str(param.name).strip()
            value = str(param.value).strip()
            
            # Convert content to markdown if it's in the 'content' parameter
            if name == "content":
                value = self._convert_wikicode_to_markdown(param.value)
                
            properties[name] = value
            
        # Create listing node
        listing_node = {
            "type": template_name,
            "properties": properties,
            "children": []
        }
        
        # Add to current section
        self.current_section["children"].append(listing_node)

    def _handle_other_template(self, template_node):
        """Handle other templates as general template nodes"""
        template_name = str(template_node.name).strip().lower()
        
        # Extract parameters
        properties = {
            "name": template_name,
            "params": {}
        }
        
        for param in template_node.params:
            name = str(param.name).strip()
            value = str(param.value).strip()
            properties["params"][name] = value
            
        # Create template node
        template_node = {
            "type": "template",
            "properties": properties,
            "children": []
        }
        
        # Add to current section
        self.current_section["children"].append(template_node)

    def _convert_wikicode_to_markdown(self, wikicode) -> str:
        """Convert wikicode to markdown"""
        markdown = ""
        
        for node in wikicode.nodes:
            if isinstance(node, nodes.text.Text):
                markdown += str(node.value)
                
            elif isinstance(node, nodes.tag.Tag):
                markdown += self._convert_tag_to_markdown(node)
                
            elif isinstance(node, nodes.wikilink.Wikilink):
                markdown += self._convert_wikilink_to_markdown(node)
                
            elif isinstance(node, nodes.external_link.ExternalLink):
                markdown += self._convert_external_link_to_markdown(node)
                
            else:
                # For other nodes, just use their string representation
                markdown += str(node)
                
        return markdown.strip()

    def _convert_tag_to_markdown(self, tag_node) -> str:
        """Convert HTML tag to markdown"""
        tag = str(tag_node.tag).lower()
        content = str(tag_node.contents)
        
        # Convert the content recursively to handle nested tags
        if tag_node.contents:
            content = self._convert_wikicode_to_markdown(tag_node.contents)
            
        # Handle different tags
        if tag == 'b' or tag == 'strong':
            return f"**{content}**"
        elif tag == 'i' or tag == 'em':
            return f"*{content}*"
        elif tag == 'u':
            return f"_{content}_"
        elif tag == 'strike' or tag == 's' or tag == 'del':
            return f"~~{content}~~"
        elif tag == 'code':
            return f"`{content}`"
        elif tag == 'pre':
            return f"```\n{content}\n```"
        elif tag == 'br':
            return "\n"
        elif tag == 'hr':
            return "\n---\n"
        elif tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
            level = int(tag[1])
            return f"\n{'#' * level} {content}\n"
        elif tag == 'a':
            href = ""
            for attr in tag_node.attributes:
                if str(attr.name).lower() == 'href':
                    href = str(attr.value)
                    break
            return f"[{content}]({href})"
        elif tag == 'img':
            src = alt = ""
            for attr in tag_node.attributes:
                if str(attr.name).lower() == 'src':
                    src = str(attr.value)
                elif str(attr.name).lower() == 'alt':
                    alt = str(attr.value)
            return f"![{alt}]({src})"
        else:
            # For unknown tags, just return the content
            return content

    def _convert_wikilink_to_markdown(self, wikilink_node) -> str:
        """Convert wikilink to markdown"""
        title = str(wikilink_node.title)
        
        if wikilink_node.text:
            text = str(wikilink_node.text)
            return f"[{text}]({title})"
        else:
            return f"[{title}]({title})"

    def _convert_external_link_to_markdown(self, link_node) -> str:
        """Convert external link to markdown"""
        url = str(link_node.url)
        
        if link_node.title:
            title = str(link_node.title)
            return f"[{title}]({url})"
        else:
            return url

    def export_json(self, root=None, indent=2) -> str:
        """Export the tree as JSON string"""
        if root is None:
            root = self.root
            
        return json.dumps(root, indent=indent)

def process_file(input_file: Path, parser: WikivoyageParser) -> None:
    """Process a single wiki file and save JSON output"""
    # Create output path with .json extension
    output_file = input_file.with_suffix('.json')
    
    # Ensure output directory exists
    output_file.parent.mkdir(parents=True, exist_ok=True)
    
    try:
        # Read and parse input file
        with open(input_file, 'r', encoding='utf-8') as f:
            wikitext = f.read()
            
        result = parser.parse(wikitext)
        
        # Write JSON output
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(parser.export_json())
            
    except Exception as e:
        print(f"Error processing {input_file}: {e}")

def main():
    # Initialize parser once for reuse
    parser = WikivoyageParser()
    
    # Get input directory from command line or use current directory
    input_dir = Path(sys.argv[1] if len(sys.argv) > 1 else '.')
    
    # Process all .txt files recursively
    for txt_file in input_dir.rglob('*.txt'):
        print(f"Processing {txt_file}")
        process_file(txt_file, parser)
        
    print("Processing complete")

if __name__ == "__main__":
    main()