diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index db7e340..0000000 --- a/.dockerignore +++ /dev/null @@ -1,8 +0,0 @@ -.env -__pycache__ -.venv -.pytest_cache -docs -node_modules -output -sketching diff --git a/.github/workflows/build-image.yaml b/.github/workflows/build-image.yaml index 1e667cf..9d824a2 100644 --- a/.github/workflows/build-image.yaml +++ b/.github/workflows/build-image.yaml @@ -23,11 +23,15 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Set up QEMU for multi-platform builds + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build and push uses: docker/build-push-action@v6 with: + platforms: linux/amd64,linux/arm64 push: true tags: ghcr.io/bcye/structured-wikivoyage-exports:latest diff --git a/.github/workflows/test-parser.yaml b/.github/workflows/test-parser.yaml index e6ee580..70a7c77 100644 --- a/.github/workflows/test-parser.yaml +++ b/.github/workflows/test-parser.yaml @@ -20,4 +20,4 @@ jobs: run: uv sync --locked --dev - name: Run tests - run: PYTHONPATH=. uv run pytest + run: PYTHONPATH=src uv run pytest diff --git a/Dockerfile b/Dockerfile index 110a0a7..92e1d8e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,16 @@ -FROM ghcr.io/astral-sh/uv:0.6-python3.12-bookworm +# use python 3.12 as a base image +FROM docker.io/python:3.12-alpine +# use the latest version of uv, independently of the python version +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ WORKDIR /app -COPY pyproject.toml uv.lock ./ +# copy the requirements and install them +COPY pyproject.toml uv.lock . RUN uv sync --frozen -COPY . . +# copy the rest of the code +COPY src ./ RUN chmod +x entrypoint.sh diff --git a/README.md b/README.md index 727acd8..db95710 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ See [docs](docs) for more information on how to use this utility. ## Testing -Run `PYTHONPATH=. pytest` from inside the venv +Run `PYTHONPATH=src pytest` from inside the venv, or directly call `PYTHONPATH=src uv run -- pytest`. ## License diff --git a/entrypoint.sh b/entrypoint.sh deleted file mode 100644 index 2d7e452..0000000 --- a/entrypoint.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -uv run main.py \ No newline at end of file diff --git a/src/entrypoint.sh b/src/entrypoint.sh new file mode 100644 index 0000000..742561c --- /dev/null +++ b/src/entrypoint.sh @@ -0,0 +1,2 @@ +#!/bin/sh +uv run main.py diff --git a/main.py b/src/main.py similarity index 100% rename from main.py rename to src/main.py diff --git a/output_handlers/__init__.py b/src/output_handlers/__init__.py similarity index 100% rename from output_handlers/__init__.py rename to src/output_handlers/__init__.py diff --git a/output_handlers/base_handler.py b/src/output_handlers/base_handler.py similarity index 100% rename from output_handlers/base_handler.py rename to src/output_handlers/base_handler.py diff --git a/output_handlers/bunny_storage.py b/src/output_handlers/bunny_storage.py similarity index 100% rename from output_handlers/bunny_storage.py rename to src/output_handlers/bunny_storage.py diff --git a/output_handlers/filesystem.py b/src/output_handlers/filesystem.py similarity index 100% rename from output_handlers/filesystem.py rename to src/output_handlers/filesystem.py diff --git a/tests/fixtures/boston_input.txt b/src/tests/fixtures/boston_input.txt similarity index 100% rename from tests/fixtures/boston_input.txt rename to src/tests/fixtures/boston_input.txt diff --git a/tests/fixtures/boston_output.json b/src/tests/fixtures/boston_output.json similarity index 100% rename from tests/fixtures/boston_output.json rename to src/tests/fixtures/boston_output.json diff --git a/tests/test_parser_json_snippets.py b/src/tests/test_parser_json_snippets.py similarity index 100% rename from tests/test_parser_json_snippets.py rename to src/tests/test_parser_json_snippets.py diff --git a/transformers/__init__.py b/src/transformers/__init__.py similarity index 100% rename from transformers/__init__.py rename to src/transformers/__init__.py diff --git a/transformers/fetch_mappings.py b/src/transformers/fetch_mappings.py similarity index 100% rename from transformers/fetch_mappings.py rename to src/transformers/fetch_mappings.py diff --git a/transformers/parser.py b/src/transformers/parser.py similarity index 100% rename from transformers/parser.py rename to src/transformers/parser.py diff --git a/transformers/wiki_dump_handler.py b/src/transformers/wiki_dump_handler.py similarity index 95% rename from transformers/wiki_dump_handler.py rename to src/transformers/wiki_dump_handler.py index c566f44..5b561cd 100644 --- a/transformers/wiki_dump_handler.py +++ b/src/transformers/wiki_dump_handler.py @@ -5,6 +5,7 @@ from .parser import WikivoyageParser logger = getLogger(__name__) + class WikiDumpHandler(xml.sax.ContentHandler): """ SAX handler that, for each whose is in mappings, @@ -92,9 +93,9 @@ class WikiDumpHandler(xml.sax.ContentHandler): async def _process(self, text: str, uid: str, title: str): parser = WikivoyageParser() entry = parser.parse(text) - entry['properties']['title'] = title - + entry["properties"]["title"] = title + # Write to all handlers concurrently - await asyncio.gather(*[ - handler.write_entry(entry, uid) for handler in self.handlers - ]) + await asyncio.gather( + *[handler.write_entry(entry, uid) for handler in self.handlers] + )