coss_archiving/docker-compose.yaml

102 lines
4.1 KiB
YAML

version: "3.9"
services:
vpn: # Creates a connection behind the ETH Firewall to access NAS and Postgres
image: wazum/openconnect-proxy:latest
environment:
- OPENCONNECT_URL=${OPENCONNECT_URL}
- OPENCONNECT_USER=${OPENCONNECT_USER}
- OPENCONNECT_PASSWORD=${OPENCONNECT_PASSWORD}
- OPENCONNECT_OPTIONS=${OPENCONNECT_OPTIONS}
cap_add:
- NET_ADMIN
volumes:
- /dev/net/tun:/dev/net/tun
# alternative to cap_add & volumes: specify privileged: true
expose: ["5432"] # exposed here because db_passhtrough uses this network. See below for more details
geckodriver: # separate docker container for pdf-download. This hugely improves stability (and creates shorter build times for the containers)
image: selenium/standalone-firefox:latest
shm_size: 2gb
environment:
- START_VNC=${HEADFULL-false} # as opposed to headless, used when requiring supervision (eg. for websites that crash)
- START_XVFB=${HEADFULL-false}
- SE_VNC_NO_PASSWORD=1
volumes:
- ${CONTAINER_DATA}/dependencies:/firefox_profile/
- ${CODE:-/dev/null}:/code
user: ${U_ID}:${U_ID} # since the app writes files to the local filesystem, it must be run as the current user
expose: ["4444"] # exposed to other docker-compose services only
ports:
- 7900:7900 # port for webvnc
db_passthrough: # Allows a container on the local network to connect to a service (here postgres) through the vpn
network_mode: "service:vpn"
image: alpine/socat:latest
command: ["tcp-listen:5432,reuseaddr,fork", "tcp-connect:${DB_HOST}:5432"]
# expose: ["5432"] We would want this passthrough to expose its ports to the other containers
# BUT since it uses the same network as the vpn-service, it can't expose ports on its own. 5432 is therefore exposed under service.vpn.expose
news_fetch: # Orchestration of the automatic download. It generates pdfs (via the geckodriver container), fetches descriptions, triggers a snaphsot (on archive.org) and writes to a db
build: news_fetch
image: news_fetch:latest
depends_on: # when using docker compose run news_fetch, the dependencies are started as well
- geckodriver
- db_passthrough
volumes:
- ${CONTAINER_DATA}:/app/containerdata # always set
- ./config/container.yaml:/app/config.yaml
- ${CODE:-/dev/null}:/code # not set in prod, defaults to /dev/null
environment:
- CONFIG_FILE=/app/config.yaml
- DEBUG=${DEBUG}
- UNAME=${UNAME}
user: ${U_ID}:${U_ID} # since the app writes files to the local filesystem, it must be run as the current user
entrypoint: ${ENTRYPOINT:-python runner.py} # by default launch workers as defined in the Dockerfile
# stdin_open: ${INTERACTIVE:-false} # docker run -i
# tty: ${INTERACTIVE:-false} # docker run -t
news_check: # Creates a small webapp on http://localhost:8080 to check previously generated pdfs (some of which are unusable and must be marked as such)
build: news_check
image: news_check:latest
user: ${U_ID}:${U_ID} # since the app writes files to the local filesystem, it must be run as the current user
depends_on:
- db_passthrough
volumes:
- ${CONTAINER_DATA}:/app/containerdata # always set
- ./config/container.yaml:/app/config.yaml
- ${CODE:-/dev/null}:/code # not set in prod, defaults to /dev/null
environment:
- CONFIG_FILE=/app/config.yaml
- UNAME=${UNAME}
ports:
- "8080:80" # 80 inside container
entrypoint: ${ENTRYPOINT:-python app.py} # by default launch workers as defined in the Dockerfile
nas_sync:
image: alpine:latest
volumes:
- ${CONTAINER_DATA}/files:/sync/local_files
- coss_smb_share:/sync/remote_files
command:
- /bin/sh
- -c
- |
apk add rsync
rsync -av --no-perms --no-owner --no-group --progress /sync/local_files/${SYNC_FOLDER}/ /sync/remote_files/${SYNC_FOLDER} -n
volumes:
coss_smb_share:
driver: local
driver_opts:
type: cifs
o: "addr=${NAS_HOST},nounix,file_mode=0777,dir_mode=0777,domain=D,username=${NAS_USERNAME},password=${NAS_PASSWORD}"
device: //${NAS_HOST}${NAS_PATH}