From e606e45bf6ce9d33e930d88602da0a14892aab3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruce=20R=C3=B6ttgers?= Date: Sat, 26 Apr 2025 22:20:38 +0200 Subject: [PATCH 1/6] add dockerfile and document --- .dockerignore | 10 +++++----- Dockerfile | 12 +++++------- README.md | 12 ++++++++++-- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/.dockerignore b/.dockerignore index 6288a4d..db7e340 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,8 +1,8 @@ .env - -# NODE -node_modules - -# PYTHON __pycache__ .venv +.pytest_cache +docs +node_modules +output +sketching diff --git a/Dockerfile b/Dockerfile index 4dfe203..5447033 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,10 @@ -FROM node:22 +FROM ghcr.io/astral-sh/uv:debian WORKDIR /app -COPY package.json . -COPY package-lock.json . +COPY pyproject.toml uv.lock ./ +RUN uv sync --frozen -RUN npm install +COPY . . -COPY index.ts . - -CMD [ "node", "--max-old-space-size=4096", "--experimental-strip-types", "index.ts" ] +CMD ["uv", "run", "transform-documents.py"] \ No newline at end of file diff --git a/README.md b/README.md index d73e166..e831e8f 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,16 @@ Small utility to convert the wikitext data from the Wikivoyage dumps into a structured format. The goal is to make it easier to work with the data and extract useful information programmatically. -## Installation +## Usage +### Docker + +This script is intended to be run with docker. A docker image is [available from the GitHub registry](). For example, you may run it using the filesystem handler with `docker run -e HANDLER=filesystem -e HANDLER_FILESYSTEM_OUTPUT_DIR=/output -v ./output:/output ghcr.io/bcye/structured-wikivoyage-exports`. For all the different options, refer to [the docs](docs). + +### Types + +TypeScript types for consuming the json output are available, you may install them from the [@bcye/structured-wikivoyage-types]() npm package. Refer to the included docstrings in [types/index.d.ts](types/index.d.ts) for reference. ## Documentation -See [docs](docs) for more information on how to use this utility. \ No newline at end of file + +See [docs](docs) for more information on how to use this utility. From ec408ca34563d25b618e64b8ed5fae6ff7436085 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruce=20R=C3=B6ttgers?= Date: Sat, 26 Apr 2025 22:50:06 +0200 Subject: [PATCH 2/6] specify version and add ulimit to readme --- Dockerfile | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5447033..5a97917 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ghcr.io/astral-sh/uv:debian +FROM ghcr.io/astral-sh/uv:0.6-python3.12-bookworm WORKDIR /app diff --git a/README.md b/README.md index e831e8f..89458be 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Small utility to convert the wikitext data from the Wikivoyage dumps into a stru ### Docker -This script is intended to be run with docker. A docker image is [available from the GitHub registry](). For example, you may run it using the filesystem handler with `docker run -e HANDLER=filesystem -e HANDLER_FILESYSTEM_OUTPUT_DIR=/output -v ./output:/output ghcr.io/bcye/structured-wikivoyage-exports`. For all the different options, refer to [the docs](docs). +This script is intended to be run with docker. A docker image is [available from the GitHub registry](). For example, you may run it using the filesystem handler with `docker run -e HANDLER=filesystem -e HANDLER_FILESYSTEM_OUTPUT_DIR=/output -v ./output:/output --ulimit nofile=65536:65536 ghcr.io/bcye/structured-wikivoyage-exports`. For all the different options, refer to [the docs](docs). ### Types From 6f1b9cec19edcd3df984012c12e110e60ebbd86b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruce=20R=C3=B6ttgers?= Date: Sat, 26 Apr 2025 23:05:09 +0200 Subject: [PATCH 3/6] add docker action --- .github/workflows/build-image.yaml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/build-image.yaml diff --git a/.github/workflows/build-image.yaml b/.github/workflows/build-image.yaml new file mode 100644 index 0000000..7b8c6dc --- /dev/null +++ b/.github/workflows/build-image.yaml @@ -0,0 +1,29 @@ +name: Build Docker Image + +on: + push: + tags: + - "docker/*" + +jobs: + docker: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build and push + uses: docker/build-push-action@v6 + with: + push: true + tags: ghcr.io/bcye/structured-wikivoyage-exports:latest From d75cd8f148a36b1870d116ca347d78fccca4222e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruce=20R=C3=B6ttgers?= Date: Sat, 26 Apr 2025 23:07:53 +0200 Subject: [PATCH 4/6] add permissions to workflow --- .github/workflows/build-image.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build-image.yaml b/.github/workflows/build-image.yaml index 7b8c6dc..1e667cf 100644 --- a/.github/workflows/build-image.yaml +++ b/.github/workflows/build-image.yaml @@ -8,6 +8,10 @@ on: jobs: docker: runs-on: ubuntu-latest + permissions: + packages: write + contents: read + id-token: write steps: - name: Checkout uses: actions/checkout@v4 From ff662c3ac5e89af5f3b9a5618ca252af52e1a6c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruce=20R=C3=B6ttgers?= Date: Sat, 26 Apr 2025 23:15:29 +0200 Subject: [PATCH 5/6] add links --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 89458be..f798781 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,11 @@ Small utility to convert the wikitext data from the Wikivoyage dumps into a stru ### Docker -This script is intended to be run with docker. A docker image is [available from the GitHub registry](). For example, you may run it using the filesystem handler with `docker run -e HANDLER=filesystem -e HANDLER_FILESYSTEM_OUTPUT_DIR=/output -v ./output:/output --ulimit nofile=65536:65536 ghcr.io/bcye/structured-wikivoyage-exports`. For all the different options, refer to [the docs](docs). +This script is intended to be run with docker. A docker image is [available from the GitHub registry](https://github.com/bcye/structured-wikivoyage-exports/pkgs/container/structured-wikivoyage-exports). For example, you may run it using the filesystem handler with `docker run -e HANDLER=filesystem -e HANDLER_FILESYSTEM_OUTPUT_DIR=/output -v ./output:/output --ulimit nofile=65536:65536 ghcr.io/bcye/structured-wikivoyage-exports`. For all the different options, refer to [the docs](docs). ### Types -TypeScript types for consuming the json output are available, you may install them from the [@bcye/structured-wikivoyage-types]() npm package. Refer to the included docstrings in [types/index.d.ts](types/index.d.ts) for reference. +TypeScript types for consuming the json output are available, you may install them from the [@bcye/structured-wikivoyage-types](https://www.npmjs.com/package/@bcye/structured-wikivoyage-types) npm package. Refer to the included docstrings in [types/index.d.ts](types/index.d.ts) for reference. ## Documentation From 08cd8b41feb0733470d8e7a819eb04c3a27fa5f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruce=20R=C3=B6ttgers?= Date: Wed, 30 Apr 2025 14:01:56 +0200 Subject: [PATCH 6/6] reflect new filename (main.py) in docker --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 5a97917..161a820 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,4 +7,4 @@ RUN uv sync --frozen COPY . . -CMD ["uv", "run", "transform-documents.py"] \ No newline at end of file +CMD ["uv", "run", "main.py"] \ No newline at end of file