2025-06-12 02:44:05 +00:00
24 changed files with 250 additions and 5458 deletions
--- a/.github/workflows/publish-types.yaml
+++ b/.github/workflows/publish-types.yaml
@ -1,40 +0,0 @@
-# Example from https://docs.github.com/en/actions/use-cases-and-examples/publishing-packages/publishing-nodejs-packages#publishing-packages-to-the-npm-registry
-
-name: Publish Types Package to npmjs
-
-on:
-  push:
-    tags:
-      - "types/*"
-
-defaults:
-  run:
-    working-directory: types
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      id-token: write
-    steps:
-      - uses: actions/checkout@v4
-      - uses: pnpm/action-setup@v4
-        name: Install pnpm
-        with:
-          version: 10
-          run_install: false
-
-      # Setup .npmrc file to publish to npm
-      - uses: actions/setup-node@v4
-        with:
-          node-version: "20.x"
-          cache: "pnpm"
-          cache-dependency-path: "types/pnpm-lock.yaml"
-          registry-url: "https://registry.npmjs.org"
-
-      - run: pnpm install --frozen-lockfile
-      - run: pnpm tsc
-      - run: pnpm publish --provenance --access public --no-git-checks
-        env:
-          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
--- a/.github/workflows/test-parser.yaml
+++ b/.github/workflows/test-parser.yaml
@ -1,23 +0,0 @@
-on:
-  pull_request:
-
-jobs:
-  run-tests:
-    name: Unit-Test Parser
-    runs-on: ubuntu-latest
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: astral-sh/setup-uv@v5
-
-      - name: "Set up Python"
-        uses: actions/setup-python@v5
-        with:
-          python-version-file: ".python-version"
-
-      - name: Install the project
-        run: uv sync --locked --dev
-
-      - name: Run tests
-        run: PYTHONPATH=. uv run pytest
--- a/2
+++ b/2
@ -7,4 +7,4 @@ RUN uv sync --frozen

 COPY . .

-CMD ["uv", "run", "main.py"]
+CMD ["uv", "run", "transform-documents.py"]
--- a/README.md
+++ b/README.md
@ -6,28 +6,12 @@ Small utility to convert the wikitext data from the Wikivoyage dumps into a stru

 ### Docker

-This script is intended to be run with docker. A docker image is [available from the GitHub registry](https://github.com/bcye/structured-wikivoyage-exports/pkgs/container/structured-wikivoyage-exports). For example, you may run it using the filesystem handler with `docker run -e HANDLER=filesystem -e HANDLER_FILESYSTEM_OUTPUT_DIR=/output -v ./output:/output --ulimit nofile=65536:65536 ghcr.io/bcye/structured-wikivoyage-exports`. For all the different options, refer to [the docs](docs).
+This script is intended to be run with docker. A docker image is [available from the GitHub registry](). For example, you may run it using the filesystem handler with `docker run -e HANDLER=filesystem -e HANDLER_FILESYSTEM_OUTPUT_DIR=/output -v ./output:/output --ulimit nofile=65536:65536 ghcr.io/bcye/structured-wikivoyage-exports`. For all the different options, refer to [the docs](docs).

 ### Types

-TypeScript types for consuming the json output are available, you may install them from the [@bcye/structured-wikivoyage-types](https://www.npmjs.com/package/@bcye/structured-wikivoyage-types) npm package. Refer to the included docstrings in [types/index.d.ts](types/index.d.ts) for reference.
+TypeScript types for consuming the json output are available, you may install them from the [@bcye/structured-wikivoyage-types]() npm package. Refer to the included docstrings in [types/index.d.ts](types/index.d.ts) for reference.

 ## Documentation

 See [docs](docs) for more information on how to use this utility.
-
-## Testing
-
-Run `PYTHONPATH=. pytest` from inside the venv
-
-## License
-
-### Code
-
-(c) 2025 bcye and moll-re
-
-All code and documentation unless otherwise stated is licensed under the AGPLv3 license, refer to [LICENSE](LICENSE) for the full license text. The types package and all its code is [licensed under MIT](types/LICENSE).
-
-### Examples
-
-Files in the `docs/example` and `tests/fixtures` are copies (.txt) or derivatives (.json) of the Boston Article on Wikivoyage and licensed under CC BY-SA 4.0. A [list of contributors is available on the original article](https://en.wikivoyage.org/w/index.php?title=Boston&action=history).
--- a/docs/types.md
+++ b/docs/types.md
@ -1,5 +0,0 @@
-# Types Package
-
-## Publishing new versions
-
-Up version in package.json and create a new commit and tag it with "types/x.y.z", the version will be published when the tag is pushed to GitHub
--- a/main.py
+++ b/main.py
@ -1,143 +0,0 @@
-#!/usr/bin/env python3
-import os
-import sys
-import re
-import zlib
-import bz2
-import asyncio
-import logging
-import importlib
-import xml.sax
-from pathlib import Path
-from dotenv import load_dotenv
-import aiohttp
-from transformers import fetch_mappings, WikiDumpHandler, WikivoyageParser
-
-
-logger = logging.getLogger(__name__)
-
-def gather_handler_kwargs(handler_name: str) -> dict:
-    """
-    Find all ENV vars starting with HANDLER_<NAME>_ and turn them into kwargs.
-    E.g. HANDLER_SFTP_HOST=foo → {"host": "foo"}, HANDLER_SFTP_PORT=2222 → {"port": 2222}
-    """
-    prefix = f"HANDLER_{handler_name.upper()}_"
-    kwargs = {}
-
-    for env_key, val in os.environ.items():
-        if not env_key.startswith(prefix):
-            continue
-        param = env_key.replace(prefix, "").lower()
-        # cast ints
-        if val.isdigit():
-            val = int(val)
-        # cast bools
-        elif val.lower() in ("true", "false"):
-            val = val.lower() == "true"
-        kwargs[param] = val
-    logger.debug(f"Handler kwargs: {kwargs}")
-    return kwargs
-
-
-async def process_dump(
-    mappings: dict[str, str], handlers
-):
-    """
-    Stream-download the bzip2-compressed XML dump and feed to SAX.
-    """
-    xml_url = (
-        "https://dumps.wikimedia.org/"
-        "enwikivoyage/latest/"
-        "enwikivoyage-latest-pages-articles.xml.bz2"
-    )
-    decomp = bz2.BZ2Decompressor()
-    sax_parser = xml.sax.make_parser()
-    dump_handler = WikiDumpHandler(mappings, handlers)
-    sax_parser.setContentHandler(dump_handler)
-
-    async with aiohttp.ClientSession() as session:
-        async with session.get(xml_url) as resp:
-            resp.raise_for_status()
-            async for chunk in resp.content.iter_chunked(1024 * 1024):
-                data = decomp.decompress(chunk)
-                if not data:
-                    continue
-                text = data.decode("utf-8", errors="ignore")
-                sax_parser.feed(text)
-    sax_parser.close()
-    if dump_handler.tasks:
-        await asyncio.gather(*dump_handler.tasks)
-
-async def main():
-    # 1. Which handler(s) to load?
-    handler_names = os.getenv("HANDLER", "").split(",")
-    if not handler_names or not handler_names[0]:
-        logger.error("Error: set ENV HANDLER (e.g. 'filesystem' or 'filesystem,sftp')")
-        sys.exit(1)
-
-    # 2. Read concurrency setting
-    try:
-        max_conc = int(os.getenv("MAX_CONCURRENT", "0"))
-    except ValueError:
-        raise ValueError("MAX_CONCURRENT must be an integer")
-
-    if max_conc < 0:
-        raise ValueError("MAX_CONCURRENT must be >= 0")
-        
-    handlers = []
-    
-    # 3. Load each handler
-    for handler_name in handler_names:
-        handler_name = handler_name.strip()
-        if not handler_name:
-            continue
-            
-        # Dynamic import
-        module_path = f"output_handlers.{handler_name}"
-        try:
-            mod = importlib.import_module(module_path)
-        except ImportError as e:
-            logger.error(f"Error loading handler module {module_path}: {e}")
-            sys.exit(1)
-
-        # Find the class: e.g. "sftp" → "SftpHandler"
-        class_name = handler_name.title().replace("_", "") + "Handler"
-        if not hasattr(mod, class_name):
-            logger.error(f"{module_path} defines no class {class_name}")
-            sys.exit(1)
-        HandlerCls = getattr(mod, class_name)
-
-        logger.info(f"Using handler from {module_path}")
-
-        # Build kwargs from ENV
-        handler_kwargs = gather_handler_kwargs(handler_name)
-        
-        # Add max_concurrent to kwargs
-        handler_kwargs["max_concurrent"] = max_conc
-
-        # Instantiate
-        handler = HandlerCls(**handler_kwargs)
-        handlers.append(handler)
-
-    # 4. Fetch mappings
-    logger.info("Fetching mappings from SQL dump…")
-    mappings = await fetch_mappings()
-    logger.info(f"Got {len(mappings)} wikibase_item mappings.")
-
-    # 5. Stream & split the XML dump
-    logger.info("Processing XML dump…")
-    await process_dump(mappings, handlers)  # Pass 0 as max_concurrent since handlers handle it
-
-    # 6. Finish up
-    await asyncio.gather(*[handler.close() for handler in handlers])
-    logger.info("All done.")
-
-
-if __name__ == "__main__":
-    load_dotenv()
-    if os.getenv("DEBUG"):
-        logging.basicConfig(level=logging.DEBUG)
-    else:
-        logging.basicConfig(level=logging.INFO)
-
-    asyncio.run(main())
--- a/output_handlers/base_handler.py
+++ b/output_handlers/base_handler.py
@ -1,7 +1,6 @@
 """Reference handler for output handlers."""
 from abc import ABC, abstractmethod
 import logging
-import asyncio



@ -15,20 +14,15 @@ class BaseHandler(ABC):
    _successful_writes = 0
    _failed_writes = 0

-    def __init__(self, fail_on_error: bool = True, max_concurrent=0, **kwargs):
+    def __init__(self, fail_on_error: bool = True, **kwargs):
        """
        Initializes the BaseHandler with optional parameters.

        Args:
            fail_on_error (bool): If True, the handler will raise an exception on error. Defaults to True.
-            max_concurrent: Maximum number of concurrent write operations.
-                            0 means unlimited concurrency.
            **kwargs: Additional keyword arguments for specific handler implementations.
        """
        self.fail_on_error = fail_on_error
-        self.semaphore = None
-        if max_concurrent > 0:
-            self.semaphore = asyncio.Semaphore(max_concurrent)


    @abstractmethod
@ -53,10 +47,6 @@ class BaseHandler(ABC):
            entry (dict): The entry to write (will be JSON-encoded).
            uid (str): The unique identifier for the entry. The default id provided by wikivoyage is recommended. 
        """
-        if self.semaphore:
-            async with self.semaphore:
-                success = await self._write_entry(entry, uid)
-        else:
        success = await self._write_entry(entry, uid)
        if success:
            self.logger.debug(f"Successfully wrote entry with UID {uid}")
--- a/output_handlers/bunny_storage.py
+++ b/output_handlers/bunny_storage.py
@ -10,9 +10,8 @@ class BunnyStorageHandler(BaseHandler):
        api_key: str,
        fail_on_error: bool = True,
        keepalive_timeout: int = 75,
-        **kwargs,
    ):
-        super().__init__(fail_on_error=fail_on_error, **kwargs)
+        super().__init__(fail_on_error=fail_on_error)
        self.base_url = f"https://{region}.bunnycdn.com/{base_path}"
        self.headers = {
            "AccessKey": api_key,
--- a/transformers/parser.py
+++ b/transformers/parser.py
--- a/pyproject.toml
+++ b/pyproject.toml
@ -12,8 +12,3 @@ dependencies = [
    "python-dotenv>=1.1.0",
    "wikitextparser>=0.56.3",
 ]
-
-[dependency-groups]
-dev = [
-    "pytest>=8.3.5",
-]
--- a/tests/fixtures/boston_input.txt
+++ b/tests/fixtures/boston_input.txt
--- a/tests/fixtures/boston_output.json
+++ b/tests/fixtures/boston_output.json
--- a/tests/test_parser_json_snippets.py
+++ b/tests/test_parser_json_snippets.py
@ -1,333 +0,0 @@
-import json
-import os
-import pytest
-from transformers import WikivoyageParser
-
-def dump(obj):
-    # canonical JSON for deep compare
-    return json.dumps(obj, sort_keys=True, separators=(",", ":"))
-
-def wrap(children):
-    """Wrap a list of child nodes in the default root envelope."""
-    return {
-        "type": "root",
-        "properties": {},
-        "children": children
-    }
-
-@pytest.fixture
-def parser():
-    return WikivoyageParser()
-
-def test_empty_input_is_root_only(parser):
-    got = parser.parse("")
-    assert dump(got) == dump(wrap([]))
-
-def test_plain_text_node(parser):
-    got = parser.parse("Just some plain text.")
-    expected = wrap([
-        {"type":"text","properties":{"markdown":"Just some plain text."},"children":[]}
-    ])
-    assert dump(got) == dump(expected)
-
-def test_template_node(parser):
-    got = parser.parse("{{foo|a=1|b=two}}")
-    expected = wrap([
-        {
-            "type":"template",
-            "properties":{"name":"foo","params":{"a":"1","b":"two"}},
-            "children":[]
-        }
-    ])
-    assert dump(got) == dump(expected)
-
-def test_see_listing_full_properties(parser):
-    snippet = (
-        "{{see"
-        "|name=Statue"
-        "|alt=Monument"
-        "|url=http://x"
-        "|email=a@b.com"
-        "|address=1 Road"
-        "|lat=1.23"
-        "|long=4.56"
-        "|directions=North"
-        "|phone=12345"
-        "|tollfree=800"
-        "|fax=54321"
-        "|hours=24/7"
-        "|price=Free"
-        "|lastedit=2020-01-01"
-        "|wikipedia=Statue"
-        "|wikidata=Q1"
-        "|content=Big statue"
-        "}}"
-    )
-    got = parser.parse(snippet)
-    expected = wrap([
-        {
-            "type":"see",
-            "properties":{
-                "name":"Statue","alt":"Monument","url":"http://x",
-                "email":"a@b.com","address":"1 Road","lat":"1.23","long":"4.56",
-                "directions":"North","phone":"12345","tollfree":"800",
-                "fax":"54321","hours":"24/7","price":"Free",
-                "lastedit":"2020-01-01","wikipedia":"Statue","wikidata":"Q1",
-                "content":"Big statue"
-            },
-            "children":[]
-        }
-    ])
-    assert dump(got) == dump(expected)
-
-def test_do_listing_full_properties(parser):
-    snippet = (
-        "{{do"
-        "|name=Walk"
-        "|alt=Stroll"
-        "|url=http://walk"
-        "|email=hi@walk"
-        "|address=Main Street"
-        "|lat=2.34"
-        "|long=5.67"
-        "|directions=East"
-        "|phone=222-333"
-        "|tollfree=800-DO-WALK"
-        "|fax=999-888"
-        "|hours=All day"
-        "|price=Free"
-        "|lastedit=2021-02-02"
-        "|wikipedia=Walking"
-        "|wikidata=Q2"
-        "|content=Enjoy a walk"
-        "}}"
-    )
-    got = parser.parse(snippet)
-    expected = wrap([
-        {
-            "type":"do",
-            "properties":{
-                "name":"Walk","alt":"Stroll","url":"http://walk",
-                "email":"hi@walk","address":"Main Street","lat":"2.34","long":"5.67",
-                "directions":"East","phone":"222-333","tollfree":"800-DO-WALK",
-                "fax":"999-888","hours":"All day","price":"Free",
-                "lastedit":"2021-02-02","wikipedia":"Walking","wikidata":"Q2",
-                "content":"Enjoy a walk"
-            },
-            "children":[]
-        }
-    ])
-    assert dump(got) == dump(expected)
-
-def test_buy_listing_full_properties(parser):
-    snippet = (
-        "{{buy"
-        "|name=Shirt"
-        "|alt=Tees"
-        "|url=http://shop"
-        "|email=sales@shop"
-        "|address=Market St"
-        "|lat=3.45"
-        "|long=6.78"
-        "|directions=West"
-        "|phone=444-555"
-        "|tollfree=800-BUY-TEE"
-        "|fax=777-666"
-        "|hours=9–6"
-        "|price=$20"
-        "|lastedit=2022-03-03"
-        "|wikipedia=Shopping"
-        "|wikidata=Q3"
-        "|content=Quality tees"
-        "}}"
-    )
-    got = parser.parse(snippet)
-    expected = wrap([
-        {
-            "type":"buy",
-            "properties":{
-                "name":"Shirt","alt":"Tees","url":"http://shop",
-                "email":"sales@shop","address":"Market St","lat":"3.45","long":"6.78",
-                "directions":"West","phone":"444-555","tollfree":"800-BUY-TEE",
-                "fax":"777-666","hours":"9–6","price":"$20",
-                "lastedit":"2022-03-03","wikipedia":"Shopping","wikidata":"Q3",
-                "content":"Quality tees"
-            },
-            "children":[]
-        }
-    ])
-    assert dump(got) == dump(expected)
-
-def test_eat_listing_full_properties(parser):
-    snippet = (
-        "{{eat"
-        "|name=Diner"
-        "|alt=Cafe"
-        "|url=http://eat"
-        "|email=food@eat"
-        "|address=Food Lane"
-        "|lat=4.56"
-        "|long=7.89"
-        "|directions=South"
-        "|phone=666-777"
-        "|tollfree=800-EAT-YUM"
-        "|fax=555-444"
-        "|hours=Breakfast"
-        "|price=$10–$30"
-        "|lastedit=2023-04-04"
-        "|wikipedia=Dining"
-        "|wikidata=Q4"
-        "|content=Best pancakes"
-        "}}"
-    )
-    got = parser.parse(snippet)
-    expected = wrap([
-        {
-            "type":"eat",
-            "properties":{
-                "name":"Diner","alt":"Cafe","url":"http://eat",
-                "email":"food@eat","address":"Food Lane","lat":"4.56","long":"7.89",
-                "directions":"South","phone":"666-777","tollfree":"800-EAT-YUM",
-                "fax":"555-444","hours":"Breakfast","price":"$10–$30",
-                "lastedit":"2023-04-04","wikipedia":"Dining","wikidata":"Q4",
-                "content":"Best pancakes"
-            },
-            "children":[]
-        }
-    ])
-    assert dump(got) == dump(expected)
-
-def test_drink_listing_full_properties(parser):
-    snippet = (
-        "{{drink"
-        "|name=Pub"
-        "|alt=Bar"
-        "|url=http://drink"
-        "|email=cheers@drink"
-        "|address=Bar Street"
-        "|lat=5.67"
-        "|long=8.90"
-        "|directions=Center"
-        "|phone=888-999"
-        "|tollfree=800-DRINK"
-        "|fax=333-222"
-        "|hours=Evening"
-        "|price=$7–$30"
-        "|lastedit=2024-05-05"
-        "|wikipedia=Nightlife"
-        "|wikidata=Q5"
-        "|content=Great brews"
-        "}}"
-    )
-    got = parser.parse(snippet)
-    expected = wrap([
-        {
-            "type":"drink",
-            "properties":{
-                "name":"Pub","alt":"Bar","url":"http://drink",
-                "email":"cheers@drink","address":"Bar Street","lat":"5.67","long":"8.90",
-                "directions":"Center","phone":"888-999","tollfree":"800-DRINK",
-                "fax":"333-222","hours":"Evening","price":"$7–$30",
-                "lastedit":"2024-05-05","wikipedia":"Nightlife","wikidata":"Q5",
-                "content":"Great brews"
-            },
-            "children":[]
-        }
-    ])
-    assert dump(got) == dump(expected)
-
-def test_sleep_listing_full_properties(parser):
-    snippet = (
-        "{{sleep"
-        "|name=Hotel"
-        "|alt=Inn"
-        "|url=http://sleep"
-        "|email=stay@sleep"
-        "|address=Sleepy Ave"
-        "|lat=6.78"
-        "|long=9.01"
-        "|directions=Uptown"
-        "|phone=000-111"
-        "|tollfree=800-SLEEP"
-        "|fax=111-000"
-        "|hours=24h"
-        "|price=$100"
-        "|lastedit=2025-06-06"
-        "|wikipedia=Accommodation"
-        "|wikidata=Q6"
-        "|checkin=3PM"
-        "|checkout=11AM"
-        "|content=Cozy rooms"
-        "}}"
-    )
-    got = parser.parse(snippet)
-    expected = wrap([
-        {
-            "type":"sleep",
-            "properties":{
-                "name":"Hotel","alt":"Inn","url":"http://sleep",
-                "email":"stay@sleep","address":"Sleepy Ave","lat":"6.78","long":"9.01",
-                "directions":"Uptown","phone":"000-111","tollfree":"800-SLEEP",
-                "fax":"111-000","hours":"24h","price":"$100",
-                "lastedit":"2025-06-06","wikipedia":"Accommodation","wikidata":"Q6",
-                "checkin":"3PM","checkout":"11AM","content":"Cozy rooms"
-            },
-            "children":[]
-        }
-    ])
-    assert dump(got) == dump(expected)
-
-def test_generic_listing_full_properties(parser):
-    snippet = (
-        "{{listing"
-        "|name=Info"
-        "|alt=Data"
-        "|url=http://info"
-        "|email=info@info"
-        "|address=Down St"
-        "|lat=7.89"
-        "|long=0.12"
-        "|directions=Here"
-        "|phone=123-000"
-        "|tollfree=800-INFO"
-        "|fax=000-123"
-        "|hours=All times"
-        "|price=$0"
-        "|lastedit=2026-07-07"
-        "|wikipedia=InfoPage"
-        "|wikidata=Q7"
-        "|content=Useful info"
-        "}}"
-    )
-    got = parser.parse(snippet)
-    expected = wrap([
-        {
-            "type":"listing",
-            "properties":{
-                "name":"Info","alt":"Data","url":"http://info",
-                "email":"info@info","address":"Down St","lat":"7.89","long":"0.12",
-                "directions":"Here","phone":"123-000","tollfree":"800-INFO",
-                "fax":"000-123","hours":"All times","price":"$0",
-                "lastedit":"2026-07-07","wikipedia":"InfoPage","wikidata":"Q7",
-                "content":"Useful info"
-            },
-            "children":[]
-        }
-    ])
-    assert dump(got) == dump(expected)
-
-def test_section_and_subsection(parser):
-    got = parser.parse("Intro\n== First ==\nHello\n=== Sub ===\nWorld")
-    sec = got["children"][1]
-    assert sec["type"] == "section" and sec["properties"]["level"] == 2
-    sub = sec["children"][1]
-    assert sub["type"] == "section" and sub["properties"]["level"] == 3
-
-def test_full_boston_snapshot(parser):
-    here = os.path.dirname(__file__)
-    inp = os.path.join(here, "fixtures", "boston_input.txt")
-    out = os.path.join(here, "fixtures", "boston_output.json")
-    wikicode = open(inp, encoding="utf-8").read()
-    expected = json.load(open(out, encoding="utf-8"))
-    got = parser.parse(wikicode)
-    assert dump(got) == dump(expected)
--- a/transform-documents.py
+++ b/transform-documents.py
@ -0,0 +1,244 @@
+#!/usr/bin/env python3
+import os
+import sys
+import re
+import zlib
+import bz2
+import asyncio
+import logging
+import importlib
+import xml.sax
+from pathlib import Path
+from dotenv import load_dotenv
+import aiohttp
+from parser import WikivoyageParser
+
+logger = logging.getLogger(__name__)
+
+def gather_handler_kwargs(handler_name: str) -> dict:
+    """
+    Find all ENV vars starting with HANDLER_<NAME>_ and turn them into kwargs.
+    E.g. HANDLER_SFTP_HOST=foo → {"host": "foo"}, HANDLER_SFTP_PORT=2222 → {"port": 2222}
+    """
+    prefix = f"HANDLER_{handler_name.upper()}_"
+    kwargs = {}
+
+    for env_key, val in os.environ.items():
+        if not env_key.startswith(prefix):
+            continue
+        param = env_key.replace(prefix, "").lower()
+        # cast ints
+        if val.isdigit():
+            val = int(val)
+        # cast bools
+        elif val.lower() in ("true", "false"):
+            val = val.lower() == "true"
+        kwargs[param] = val
+    logger.debug(f"Handler kwargs: {kwargs}")
+    return kwargs
+
+async def fetch_mappings() -> dict[str, str]:
+    """
+    Download and gunzip the page_props SQL dump, extract
+    page→wikibase_item mappings.
+    """
+    sql_url = (
+        "https://dumps.wikimedia.org/"
+        "enwikivoyage/latest/"
+        "enwikivoyage-latest-page_props.sql.gz"
+    )
+    # decompress gzip
+    decomp = zlib.decompressobj(16 + zlib.MAX_WBITS)
+    # regex for tuples: (page,'prop','value',NULL_or_number)
+    tuple_re = re.compile(r"\((\d+),'([^']+)','([^']+)',(NULL|[\d\.]+)\)")
+    buffer = ""
+    mappings: dict[str, str] = {}
+
+    async with aiohttp.ClientSession() as session:
+        async with session.get(sql_url) as resp:
+            resp.raise_for_status()
+            async for chunk in resp.content.iter_chunked(1024 * 1024):
+                data = decomp.decompress(chunk)
+                if not data:
+                    continue
+                text = data.decode("utf-8", errors="ignore")
+                buffer += text
+                for m in tuple_re.finditer(buffer):
+                    page_id, prop, value = m.group(1), m.group(2), m.group(3)
+                    if prop == "wikibase_item":
+                        mappings[page_id] = value
+                # keep tail to handle split tuples
+                if len(buffer) > 1000:
+                    buffer = buffer[-1000:]
+    return mappings
+
+class WikiDumpHandler(xml.sax.ContentHandler):
+    """
+    SAX handler that, for each <page> whose <id> is in mappings,
+    collects the <text> and schedules an async task to parse
+    and write via the user‐supplied handler.
+    """
+
+    def __init__(self, mappings, handler, max_concurrent):
+        super().__init__()
+        self.mappings = mappings
+        self.handler = handler
+        self.sem = (
+            asyncio.Semaphore(max_concurrent) if max_concurrent > 0 else None
+        )
+        self.tasks: list[asyncio.Task] = []
+
+        self.currentTag: str | None = None
+        self.inPage = False
+        self.inRevision = False
+        self.inText = False
+        self.currentPageId: str | None = None
+        self.currentText: list[str] = []
+
+    def startElement(self, name, attrs):
+        self.currentTag = name
+        if name == "page":
+            self.inPage = True
+            self.currentPageId = None
+            self.currentText = []
+        elif name == "revision":
+            self.inRevision = True
+        elif name == "text" and self.inRevision:
+            self.inText = True
+
+    def endElement(self, name):
+        if name == "page":
+            pid = self.currentPageId
+            if pid and pid in self.mappings:
+                wd_id = self.mappings[pid]
+                text = "".join(self.currentText)
+                # schedule processing
+                if self.sem:
+                    task = asyncio.create_task(self._bounded_process(text, wd_id))
+                else:
+                    task = asyncio.create_task(self._process(text, wd_id))
+                self.tasks.append(task)
+            # reset
+            self.inPage = self.inRevision = self.inText = False
+            self.currentPageId = None
+            self.currentText = []
+        elif name == "revision":
+            self.inRevision = False
+        elif name == "text":
+            self.inText = False
+        self.currentTag = None
+
+    def characters(self, content):
+        if not content.strip():
+            return
+        if (
+            self.currentTag == "id"
+            and self.inPage
+            and not self.inRevision
+            and not self.currentPageId
+        ):
+            self.currentPageId = content.strip()
+        elif self.inText:
+            self.currentText.append(content)
+
+    async def _process(self, text: str, uid: str):
+        parser = WikivoyageParser()
+        entry = parser.parse(text)
+        await self.handler.write_entry(entry, uid)
+
+    async def _bounded_process(self, text: str, uid: str):
+        # Only run N at once
+        async with self.sem:
+            await self._process(text, uid)
+
+async def process_dump(
+    mappings: dict[str, str], handler, max_concurrent: int
+):
+    """
+    Stream-download the bzip2-compressed XML dump and feed to SAX.
+    """
+    xml_url = (
+        "https://dumps.wikimedia.org/"
+        "enwikivoyage/latest/"
+        "enwikivoyage-latest-pages-articles.xml.bz2"
+    )
+    decomp = bz2.BZ2Decompressor()
+    sax_parser = xml.sax.make_parser()
+    dump_handler = WikiDumpHandler(mappings, handler, max_concurrent)
+    sax_parser.setContentHandler(dump_handler)
+
+    async with aiohttp.ClientSession() as session:
+        async with session.get(xml_url) as resp:
+            resp.raise_for_status()
+            async for chunk in resp.content.iter_chunked(1024 * 1024):
+                data = decomp.decompress(chunk)
+                if not data:
+                    continue
+                text = data.decode("utf-8", errors="ignore")
+                sax_parser.feed(text)
+    sax_parser.close()
+    if dump_handler.tasks:
+        await asyncio.gather(*dump_handler.tasks)
+
+async def main():
+    # 1. Which handler to load?
+    handler_name = os.getenv("HANDLER")
+    if not handler_name:
+        logger.error("Error: set ENV HANDLER (e.g. 'filesystem')")
+        sys.exit(1)
+
+    # 2. Dynamic import
+    module_path = f"output_handlers.{handler_name}"
+    try:
+        mod = importlib.import_module(module_path)
+    except ImportError as e:
+        logger.error(f"Error loading handler module {module_path}: {e}")
+        sys.exit(1)
+
+    # 3. Find the class: e.g. "sftp" → "SftpHandler"
+    class_name = handler_name.title().replace("_", "") + "Handler"
+    if not hasattr(mod, class_name):
+        logger.error(f"{module_path} defines no class {class_name}")
+        sys.exit(1)
+    HandlerCls = getattr(mod, class_name)
+
+    logger.info(f"Using handler from {module_path}")
+
+    # 4. Build kwargs from ENV
+    handler_kwargs = gather_handler_kwargs(handler_name)
+
+    # 5. Instantiate
+    handler = HandlerCls(**handler_kwargs)
+
+    # 6. read concurrency setting
+    try:
+        max_conc = int(os.getenv("MAX_CONCURRENT", "0"))
+    except ValueError:
+        raise ValueError("MAX_CONCURRENT must be an integer")
+
+    if max_conc < 0:
+        raise ValueError("MAX_CONCURRENT must be >= 0")
+
+
+    # 7. Fetch mappings
+    logger.info("Fetching mappings from SQL dump…")
+    mappings = await fetch_mappings()
+    logger.info(f"Got {len(mappings)} wikibase_item mappings.")
+
+    # 8. Stream & split the XML dump
+    logger.info("Processing XML dump…")
+    await process_dump(mappings, handler, max_conc)
+
+    # 5. Finish up
+    await handler.close()
+    logger.info("All done.")
+
+
+if __name__ == "__main__":
+    load_dotenv()
+    if os.getenv("DEBUG"):
+        logging.basicConfig(level=logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+
+    asyncio.run(main())
--- a/transformers/init.py
+++ b/transformers/init.py
@ -1,3 +0,0 @@
-from .fetch_mappings import fetch_mappings
-from .wiki_dump_handler import WikiDumpHandler
-from .parser import WikivoyageParser
--- a/transformers/fetch_mappings.py
+++ b/transformers/fetch_mappings.py
@ -1,42 +0,0 @@
-from logging import getLogger
-import zlib
-import re
-import aiohttp
-
-logger = getLogger(__name__)
-
-async def fetch_mappings() -> dict[str, str]:
-    """
-    Download and gunzip the page_props SQL dump, extract
-    page→wikibase_item mappings.
-    """
-    sql_url = (
-        "https://dumps.wikimedia.org/"
-        "enwikivoyage/latest/"
-        "enwikivoyage-latest-page_props.sql.gz"
-    )
-    # decompress gzip
-    decomp = zlib.decompressobj(16 + zlib.MAX_WBITS)
-    # regex for tuples: (page,'prop','value',NULL_or_number)
-    tuple_re = re.compile(r"\((\d+),'([^']+)','([^']+)',(NULL|[\d\.]+)\)")
-    buffer = ""
-    mappings: dict[str, str] = {}
-
-    async with aiohttp.ClientSession() as session:
-        async with session.get(sql_url) as resp:
-            resp.raise_for_status()
-            async for chunk in resp.content.iter_chunked(1024 * 1024):
-                data = decomp.decompress(chunk)
-                if not data:
-                    continue
-                text = data.decode("utf-8", errors="ignore")
-                buffer += text
-                for m in tuple_re.finditer(buffer):
-                    page_id, prop, value = m.group(1), m.group(2), m.group(3)
-                    if prop == "wikibase_item":
-                        logger.debug(f"Found mapping {page_id} -> {value}")
-                        mappings[page_id] = value
-                # keep tail to handle split tuples
-                if len(buffer) > 1000:
-                    buffer = buffer[-1000:]
-    return mappings
--- a/transformers/wiki_dump_handler.py
+++ b/transformers/wiki_dump_handler.py
@ -1,100 +0,0 @@
-from logging import getLogger
-import xml.sax
-import asyncio
-from .parser import WikivoyageParser
-
-logger = getLogger(__name__)
-
-class WikiDumpHandler(xml.sax.ContentHandler):
-    """
-    SAX handler that, for each <page> whose <id> is in mappings,
-    collects the <text> and schedules an async task to parse
-    and write via the user‐supplied handler(s).
-    """
-
-    def __init__(self, mappings, handlers):
-        super().__init__()
-        self.mappings = mappings
-        # Support a single handler or a list of handlers
-        self.handlers = handlers
-        self.tasks: list[asyncio.Task] = []
-
-        self.currentTag: str | None = None
-        self.inPage = False
-        self.inRevision = False
-        self.inText = False
-        self.currentPageId: str | None = None
-        self.currentTitle: str | None = None
-        self.currentText: list[str] = []
-
-    def startElement(self, name, attrs):
-        self.currentTag = name
-        if name == "page":
-            logger.debug("start page")
-            self.inPage = True
-            self.currentPageId = None
-            self.currentTitle = None
-            self.currentText = []
-        elif name == "revision":
-            logger.debug("start revision")
-            self.inRevision = True
-        elif name == "text" and self.inRevision:
-            logger.debug("start text")
-            self.inText = True
-
-    def endElement(self, name):
-        if name == "page":
-            logger.debug("end page")
-            pid = self.currentPageId
-            if pid and pid in self.mappings:
-                wd_id = self.mappings[pid]
-                text = "".join(self.currentText)
-                title = self.currentTitle
-                logger.debug(f"scheduled {wd_id} for handling")
-                # schedule processing
-                task = asyncio.create_task(self._process(text, wd_id, title))
-                self.tasks.append(task)
-            else:
-                logger.debug(f"page {pid} without wikidata id, skipping...")
-            # reset
-            self.inPage = self.inRevision = self.inText = False
-            self.currentPageId = None
-            self.currentTitle = None
-            self.currentText = []
-        elif name == "revision":
-            logger.debug("end revision")
-            self.inRevision = False
-        elif name == "text":
-            logger.debug("end text")
-            self.inText = False
-        self.currentTag = None
-
-    def characters(self, content):
-        # Only filter whitespace for ID fields, preserve all content for text
-        if (
-            self.currentTag == "id"
-            and self.inPage
-            and not self.inRevision
-            and not self.currentPageId
-        ):
-            content_stripped = content.strip()
-            if content_stripped:  # Only process non-empty ID content
-                self.currentPageId = content_stripped
-        elif self.currentTag == "title" and self.inPage:
-            if self.currentTitle is None:
-                self.currentTitle = content
-            else:
-                self.currentTitle += content
-        elif self.inText:
-            # Always append text content, even if it's just whitespace or newlines
-            self.currentText.append(content)
-
-    async def _process(self, text: str, uid: str, title: str):
-        parser = WikivoyageParser()
-        entry = parser.parse(text)
-        entry['properties']['title'] = title
-        
-        # Write to all handlers concurrently
-        await asyncio.gather(*[
-            handler.write_entry(entry, uid) for handler in self.handlers
-        ])
--- a/types/index.ts
+++ b/types/index.ts
--- a/types/LICENSE
+++ b/types/LICENSE
@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2025 bcye and moll-re
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/types/README.md
+++ b/types/README.md
@ -1,3 +0,0 @@
-# @bcye/structured-wikivoyage-types
-
-Types to use when consuming json trees from the structured-wikivoyage-exports project
--- a/types/package.json
+++ b/types/package.json
@ -1,29 +0,0 @@
-{
-  "name": "@bcye/structured-wikivoyage-types",
-  "version": "0.2.5",
-  "description": "Types to use when consuming json trees from the structured-wikivoyage-exports project",
-  "keywords": [],
-  "contributors": [
-    "bcye",
-    "moll-re"
-  ],
-  "license": "MIT",c
-  "repository": {
-    "type": "git",
-    "url": "git+https://github.com/bcye/structured-wikivoyage-exports.git"
-  },
-  "bugs": {
-    "url": "https://github.com/bcye/structured-wikivoyage-exports/issues"
-  },
-  "homepage": "https://github.com/bcye/structured-wikivoyage-exports#readme",
-  "files": [
-    "dist/index.d.ts",
-    "dist/index.js"
-  ],
-  "main": "dist/index.js",
-  "types": "dist/index.d.ts",
-  "private": false,
-  "devDependencies": {
-    "typescript": "^5.8.3"
-  }
-}
--- a/types/pnpm-lock.yaml
+++ b/types/pnpm-lock.yaml
@ -1,24 +0,0 @@
-lockfileVersion: '9.0'
-
-settings:
-  autoInstallPeers: true
-  excludeLinksFromLockfile: false
-
-importers:
-
-  .:
-    devDependencies:
-      typescript:
-        specifier: ^5.8.3
-        version: 5.8.3
-
-packages:
-
-  typescript@5.8.3:
-    resolution: {integrity: sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==}
-    engines: {node: '>=14.17'}
-    hasBin: true
-
-snapshots:
-
-  typescript@5.8.3: {}
--- a/types/tsconfig.json
+++ b/types/tsconfig.json
@ -1,113 +0,0 @@
-{
-  "compilerOptions": {
-    /* Visit https://aka.ms/tsconfig to read more about this file */
-
-    /* Projects */
-    // "incremental": true,                              /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
-    // "composite": true,                                /* Enable constraints that allow a TypeScript project to be used with project references. */
-    // "tsBuildInfoFile": "./.tsbuildinfo",              /* Specify the path to .tsbuildinfo incremental compilation file. */
-    // "disableSourceOfProjectReferenceRedirect": true,  /* Disable preferring source files instead of declaration files when referencing composite projects. */
-    // "disableSolutionSearching": true,                 /* Opt a project out of multi-project reference checking when editing. */
-    // "disableReferencedProjectLoad": true,             /* Reduce the number of projects loaded automatically by TypeScript. */
-
-    /* Language and Environment */
-    "target": "es2016" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
-    // "lib": [],                                        /* Specify a set of bundled library declaration files that describe the target runtime environment. */
-    // "jsx": "preserve",                                /* Specify what JSX code is generated. */
-    // "libReplacement": true,                           /* Enable lib replacement. */
-    // "experimentalDecorators": true,                   /* Enable experimental support for legacy experimental decorators. */
-    // "emitDecoratorMetadata": true,                    /* Emit design-type metadata for decorated declarations in source files. */
-    // "jsxFactory": "",                                 /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
-    // "jsxFragmentFactory": "",                         /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
-    // "jsxImportSource": "",                            /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
-    // "reactNamespace": "",                             /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
-    // "noLib": true,                                    /* Disable including any library files, including the default lib.d.ts. */
-    // "useDefineForClassFields": true,                  /* Emit ECMAScript-standard-compliant class fields. */
-    // "moduleDetection": "auto",                        /* Control what method is used to detect module-format JS files. */
-
-    /* Modules */
-    "module": "commonjs" /* Specify what module code is generated. */,
-    // "rootDir": "./",                                  /* Specify the root folder within your source files. */
-    // "moduleResolution": "node10",                     /* Specify how TypeScript looks up a file from a given module specifier. */
-    // "baseUrl": "./",                                  /* Specify the base directory to resolve non-relative module names. */
-    // "paths": {},                                      /* Specify a set of entries that re-map imports to additional lookup locations. */
-    // "rootDirs": [],                                   /* Allow multiple folders to be treated as one when resolving modules. */
-    // "typeRoots": [],                                  /* Specify multiple folders that act like './node_modules/@types'. */
-    // "types": [],                                      /* Specify type package names to be included without being referenced in a source file. */
-    // "allowUmdGlobalAccess": true,                     /* Allow accessing UMD globals from modules. */
-    // "moduleSuffixes": [],                             /* List of file name suffixes to search when resolving a module. */
-    // "allowImportingTsExtensions": true,               /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
-    // "rewriteRelativeImportExtensions": true,          /* Rewrite '.ts', '.tsx', '.mts', and '.cts' file extensions in relative import paths to their JavaScript equivalent in output files. */
-    // "resolvePackageJsonExports": true,                /* Use the package.json 'exports' field when resolving package imports. */
-    // "resolvePackageJsonImports": true,                /* Use the package.json 'imports' field when resolving imports. */
-    // "customConditions": [],                           /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
-    // "noUncheckedSideEffectImports": true,             /* Check side effect imports. */
-    // "resolveJsonModule": true,                        /* Enable importing .json files. */
-    // "allowArbitraryExtensions": true,                 /* Enable importing files with any extension, provided a declaration file is present. */
-    // "noResolve": true,                                /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
-
-    /* JavaScript Support */
-    // "allowJs": true,                                  /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
-    // "checkJs": true,                                  /* Enable error reporting in type-checked JavaScript files. */
-    // "maxNodeModuleJsDepth": 1,                        /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
-
-    /* Emit */
-    "declaration": true /* Generate .d.ts files from TypeScript and JavaScript files in your project. */,
-    // "declarationMap": true,                           /* Create sourcemaps for d.ts files. */
-    // "emitDeclarationOnly": true,                      /* Only output d.ts files and not JavaScript files. */
-    // "sourceMap": true,                                /* Create source map files for emitted JavaScript files. */
-    // "inlineSourceMap": true,                          /* Include sourcemap files inside the emitted JavaScript. */
-    // "noEmit": true,                                   /* Disable emitting files from a compilation. */
-    // "outFile": "./",                                  /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
-    "outDir": "./dist/" /* Specify an output folder for all emitted files. */,
-    // "removeComments": true,                           /* Disable emitting comments. */
-    // "importHelpers": true,                            /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
-    // "downlevelIteration": true,                       /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
-    // "sourceRoot": "",                                 /* Specify the root path for debuggers to find the reference source code. */
-    // "mapRoot": "",                                    /* Specify the location where debugger should locate map files instead of generated locations. */
-    // "inlineSources": true,                            /* Include source code in the sourcemaps inside the emitted JavaScript. */
-    // "emitBOM": true,                                  /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
-    // "newLine": "crlf",                                /* Set the newline character for emitting files. */
-    // "stripInternal": true,                            /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
-    // "noEmitHelpers": true,                            /* Disable generating custom helper functions like '__extends' in compiled output. */
-    // "noEmitOnError": true,                            /* Disable emitting files if any type checking errors are reported. */
-    // "preserveConstEnums": true,                       /* Disable erasing 'const enum' declarations in generated code. */
-    // "declarationDir": "./",                           /* Specify the output directory for generated declaration files. */
-
-    /* Interop Constraints */
-    // "isolatedModules": true,                          /* Ensure that each file can be safely transpiled without relying on other imports. */
-    // "verbatimModuleSyntax": true,                     /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
-    // "isolatedDeclarations": true,                     /* Require sufficient annotation on exports so other tools can trivially generate declaration files. */
-    // "erasableSyntaxOnly": true,                       /* Do not allow runtime constructs that are not part of ECMAScript. */
-    // "allowSyntheticDefaultImports": true,             /* Allow 'import x from y' when a module doesn't have a default export. */
-    "esModuleInterop": true /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */,
-    // "preserveSymlinks": true,                         /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
-    "forceConsistentCasingInFileNames": true /* Ensure that casing is correct in imports. */,
-
-    /* Type Checking */
-    "strict": true /* Enable all strict type-checking options. */,
-    // "noImplicitAny": true,                            /* Enable error reporting for expressions and declarations with an implied 'any' type. */
-    // "strictNullChecks": true,                         /* When type checking, take into account 'null' and 'undefined'. */
-    // "strictFunctionTypes": true,                      /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
-    // "strictBindCallApply": true,                      /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
-    // "strictPropertyInitialization": true,             /* Check for class properties that are declared but not set in the constructor. */
-    // "strictBuiltinIteratorReturn": true,              /* Built-in iterators are instantiated with a 'TReturn' type of 'undefined' instead of 'any'. */
-    // "noImplicitThis": true,                           /* Enable error reporting when 'this' is given the type 'any'. */
-    // "useUnknownInCatchVariables": true,               /* Default catch clause variables as 'unknown' instead of 'any'. */
-    // "alwaysStrict": true,                             /* Ensure 'use strict' is always emitted. */
-    // "noUnusedLocals": true,                           /* Enable error reporting when local variables aren't read. */
-    // "noUnusedParameters": true,                       /* Raise an error when a function parameter isn't read. */
-    // "exactOptionalPropertyTypes": true,               /* Interpret optional property types as written, rather than adding 'undefined'. */
-    // "noImplicitReturns": true,                        /* Enable error reporting for codepaths that do not explicitly return in a function. */
-    // "noFallthroughCasesInSwitch": true,               /* Enable error reporting for fallthrough cases in switch statements. */
-    // "noUncheckedIndexedAccess": true,                 /* Add 'undefined' to a type when accessed using an index. */
-    // "noImplicitOverride": true,                       /* Ensure overriding members in derived classes are marked with an override modifier. */
-    // "noPropertyAccessFromIndexSignature": true,       /* Enforces using indexed accessors for keys declared using an indexed type. */
-    // "allowUnusedLabels": true,                        /* Disable error reporting for unused labels. */
-    // "allowUnreachableCode": true,                     /* Disable error reporting for unreachable code. */
-
-    /* Completeness */
-    // "skipDefaultLibCheck": true,                      /* Skip type checking .d.ts files that are included with TypeScript. */
-    "skipLibCheck": true /* Skip type checking all .d.ts files. */
-  }
-}
--- a/uv.lock
+++ b/uv.lock
@ -135,15 +135,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009 },
 ]

-[[package]]
-name = "colorama"
-version = "0.4.6"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
-]
-
 [[package]]
 name = "cryptography"
 version = "44.0.2"
@ -248,15 +239,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 },
 ]

-[[package]]
-name = "iniconfig"
-version = "2.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 },
-]
-
 [[package]]
 name = "mapvoyage-extract"
 version = "0.1.0"
@ -270,11 +252,6 @@ dependencies = [
    { name = "wikitextparser" },
 ]

-[package.dev-dependencies]
-dev = [
-    { name = "pytest" },
-]
-
 [package.metadata]
 requires-dist = [
    { name = "aiofiles", specifier = ">=24.1.0" },
@ -285,9 +262,6 @@ requires-dist = [
    { name = "wikitextparser", specifier = ">=0.56.3" },
 ]

-[package.metadata.requires-dev]
-dev = [{ name = "pytest", specifier = ">=8.3.5" }]
-
 [[package]]
 name = "multidict"
 version = "6.4.3"
@ -361,24 +335,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/78/09/24c2f37524a3ebc3574975766748c7e4423ecefaa815c9fc4a324cbcf94a/mwparserfromhell-0.6.6-cp312-cp312-win_amd64.whl", hash = "sha256:cdc46c115b2495d4025920b7b30a6885a96d2b797ccc4009bf3cc02940ae55d3", size = 101071 },
 ]

-[[package]]
-name = "packaging"
-version = "25.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 },
-]
-
-[[package]]
-name = "pluggy"
-version = "1.5.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 },
-]
-
 [[package]]
 name = "propcache"
 version = "0.3.1"
@ -445,21 +401,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552 },
 ]

-[[package]]
-name = "pytest"
-version = "8.3.5"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
-    { name = "iniconfig" },
-    { name = "packaging" },
-    { name = "pluggy" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634 },
-]
-
 [[package]]
 name = "python-dotenv"
 version = "1.1.0"