diff --git a/.dockerignore b/.dockerignore index 6288a4d..db7e340 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,8 +1,8 @@ .env - -# NODE -node_modules - -# PYTHON __pycache__ .venv +.pytest_cache +docs +node_modules +output +sketching diff --git a/Dockerfile b/Dockerfile index 4dfe203..5447033 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,10 @@ -FROM node:22 +FROM ghcr.io/astral-sh/uv:debian WORKDIR /app -COPY package.json . -COPY package-lock.json . +COPY pyproject.toml uv.lock ./ +RUN uv sync --frozen -RUN npm install +COPY . . -COPY index.ts . - -CMD [ "node", "--max-old-space-size=4096", "--experimental-strip-types", "index.ts" ] +CMD ["uv", "run", "transform-documents.py"] \ No newline at end of file diff --git a/README.md b/README.md index d73e166..e831e8f 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,16 @@ Small utility to convert the wikitext data from the Wikivoyage dumps into a structured format. The goal is to make it easier to work with the data and extract useful information programmatically. -## Installation +## Usage +### Docker + +This script is intended to be run with docker. A docker image is [available from the GitHub registry](). For example, you may run it using the filesystem handler with `docker run -e HANDLER=filesystem -e HANDLER_FILESYSTEM_OUTPUT_DIR=/output -v ./output:/output ghcr.io/bcye/structured-wikivoyage-exports`. For all the different options, refer to [the docs](docs). + +### Types + +TypeScript types for consuming the json output are available, you may install them from the [@bcye/structured-wikivoyage-types]() npm package. Refer to the included docstrings in [types/index.d.ts](types/index.d.ts) for reference. ## Documentation -See [docs](docs) for more information on how to use this utility. \ No newline at end of file + +See [docs](docs) for more information on how to use this utility.