coss_archiving/docker-compose.yaml

96 lines
4.1 KiB
YAML

version: "3.9"
services:
vpn: # Creates a connection behind the ETH Firewall to access NAS and Postgres
image: wazum/openconnect-proxy:latest
env_file:
- ${CONTAINER_DATA}/config/vpn.config
cap_add:
- NET_ADMIN
volumes:
- /dev/net/tun:/dev/net/tun
# alternative to cap_add & volumes: specify privileged: true
expose: ["5432"] # exposed here because db_passhtrough uses this network. See below for more details
nas_sync: # Syncs locally downloaded files with the NAS-share on nas22.ethz.ch/...
depends_on:
- vpn
network_mode: "service:vpn" # used to establish a connection to the SMB server from inside ETH network
build: nas_sync # local folder to build
image: nas_sync:latest
cap_add: # capabilities needed for mounting the SMB share
- SYS_ADMIN
- DAC_READ_SEARCH
volumes:
- ${CONTAINER_DATA}/files:/sync/local_files
- ${CONTAINER_DATA}/config/nas_sync.config:/sync/nas_sync.config
- ${CONTAINER_DATA}/config/nas_login.config:/sync/nas_login.config
command:
- nas22.ethz.ch/gess_coss_1/helbing_support/Archiving-Pipeline # first command is the target mount path
- lsyncd
- /sync/nas_sync.config
geckodriver: # separate docker container for pdf-download. This hugely improves stability (and creates shorter build times for the containers)
image: selenium/standalone-firefox:latest
shm_size: 2gb
environment:
- START_VNC=${HEADFULL-false} # as opposed to headless, used when requiring supervision (eg. for websites that crash)
- START_XVFB=${HEADFULL-false}
- SE_VNC_NO_PASSWORD=1
# - SE_OPTS="--profile /user_data/news_fetch.profile.firefox"
volumes:
- ${CONTAINER_DATA}/dependencies:/firefox_profile/
- ${CODE:-/dev/null}:/code
user: ${U_ID}:${U_ID} # since the app writes files to the local filesystem, it must be run as the current user
expose: ["4444"] # exposed to other docker-compose services only
ports:
- 7900:7900 # port for webvnc
db_passthrough: # Allows a container on the local network to connect to a service (here postgres) through the vpn
network_mode: "service:vpn"
image: alpine/socat:latest
command: ["tcp-listen:5432,reuseaddr,fork", "tcp-connect:id-hdb-psgr-cp48.ethz.ch:5432"]
# expose: ["5432"] We would want this passthrough to expose its ports to the other containers
# BUT since it uses the same network as the vpn-service, it can't expose ports on its own. 5432 is therefore exposed under service.vpn.expose
news_fetch: # Orchestration of the automatic download. It generates pdfs (via the geckodriver container), fetches descriptions, triggers a snaphsot (on archive.org) and writes to a db
build: news_fetch
image: news_fetch:latest
depends_on: # when using docker compose run news_fetch, the dependencies are started as well
- nas_sync
- geckodriver
- db_passthrough
volumes:
- ${CONTAINER_DATA}:/app/containerdata # always set
- ${CODE:-/dev/null}:/code # not set in prod, defaults to /dev/null
environment:
- DEBUG=${DEBUG}
- UNAME=${UNAME}
user: ${U_ID}:${U_ID} # since the app writes files to the local filesystem, it must be run as the current user
entrypoint: ${ENTRYPOINT:-python runner.py} # by default launch workers as defined in the Dockerfile
# stdin_open: ${INTERACTIVE:-false} # docker run -i
# tty: ${INTERACTIVE:-false} # docker run -t
news_check: # Creates a small webapp on http://localhost:8080 to check previously generated pdfs (some of which are unusable and must be marked as such)
build: news_check
image: news_check:latest
user: ${U_ID}:${U_ID} # since the app writes files to the local filesystem, it must be run as the current user
depends_on:
- db_passthrough
volumes:
- ${CONTAINER_DATA}:/app/containerdata # always set
- ${CODE:-/dev/null}:/code # not set in prod, defaults to /dev/null
environment:
- UNAME=${UNAME}
ports:
- "8080:80" # 80 inside container
entrypoint: ${ENTRYPOINT:-python app.py} # by default launch workers as defined in the Dockerfile
tty: true