reduced slack functionality, higher ease of use. Database migration wip

2022-09-05 16:29:19 +02:00
parent e1a8dabd2c
commit 77c96be844
35 changed files with 789 additions and 998 deletions
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -1,25 +1,8 @@
-# Usage:
-# docker compose --env-file env/<mode> run <args> news_fetch && docker-compose --env-file env/production down
-
 version: "3.9"

 services:

-  geckodriver:
-    image: selenium/standalone-firefox:103.0
-    volumes:
-      - ${XSOCK-/dev/null}:${XSOCK-/tmp/sock}
-      - ${XAUTHORITY-/dev/null}:/home/auto_news/.Xauthority
-    environment:
-      - DISPLAY=$DISPLAY
-      - START_VNC=false
-      - START_XVFB=false
-    user: 1001:1001
-    expose: # exposed to other docker-compose services only
-    - "4444"
-
-
-  vpn:
+  vpn: # Creates a connection behind the ETH Firewall to access NAS and Postgres
    image: wazum/openconnect-proxy:latest
    env_file:
      - ${CONTAINER_DATA}/config/vpn.config
@@ -28,13 +11,14 @@ services:
    volumes:
      - /dev/net/tun:/dev/net/tun
    # alternative to cap_add & volumes: specify privileged: true
+    expose: ["5432"] # exposed here because db_passhtrough uses this network. See below for more details
    

-  nas_sync:
+  nas_sync: # Syncs locally downloaded files with the NAS-share on nas22.ethz.ch/...
    depends_on:
-      - vpn # used to establish a connection to the SMB server
-    network_mode: "service:vpn"
-    build: nas_sync
+      - vpn
+    network_mode: "service:vpn" # used to establish a connection to the SMB server from inside ETH network
+    build: nas_sync # local folder to build
    image: nas_sync:latest
    cap_add: # capabilities needed for mounting the SMB share
      - SYS_ADMIN
@@ -49,33 +33,56 @@ services:
      - /sync/nas_sync.config


-  news_fetch:
+  geckodriver: # separate docker container for pdf-download. This hugely improves stability (and creates shorter build times for the containers)
+    image: selenium/standalone-firefox:103.0 # latest version because it mirrors the locally installed version (which is automatically updated)
+    environment:
+      - START_VNC=${HEADFULL-false} # as opposed to headless, used when requiring supervision (eg. for websites that crash)
+      - START_XVFB=${HEADFULL-false}
+    expose: ["4444"] # exposed to other docker-compose services only
+    ports:
+      - 7900:7900 # port for webvnc
+
+
+  db_passthrough: # Allows a container on the local network to connect to a service (here postgres) through the vpn
+    network_mode: "service:vpn"
+    image: alpine/socat:latest
+    command: ["tcp-listen:5432,reuseaddr,fork", "tcp-connect:id-hdb-psgr-cp48.ethz.ch:5432"]
+    # expose: ["5432"] We would want this passthrough to expose its ports to the other containers
+    # BUT since it uses the same network as the vpn-service, it can't expose ports on its own. 5432 is therefore exposed under service.vpn.expose 
+
+
+  news_fetch: # Orchestration of the automatic download. It generates pdfs (via the geckodriver container), fetches descriptions, triggers a snaphsot (on archive.org) and writes to a db
    build: news_fetch
    image: news_fetch:latest

    depends_on: # when using docker compose run news_fetch, the dependencies are started as well
      - nas_sync
      - geckodriver
+      - db_passthrough

    volumes:
      - ${CONTAINER_DATA}:/app/containerdata # always set
      - ${CODE:-/dev/null}:/code # not set in prod, defaults to /dev/null
    environment:
      - DEBUG=${DEBUG}
-      - HEADLESS=${HEADLESS}
-      - REDUCEDFETCH=${REDUCEDFETCH}
-    entrypoint: ${ENTRYPOINT:-python3 runner.py} # by default launch workers as defined in the Dockerfile
-    stdin_open: ${INTERACTIVE:-false} # docker run -i
-    tty: ${INTERACTIVE:-false}        # docker run -t
+      - UNAME=${UNAME}
+    entrypoint: ${ENTRYPOINT:-python runner.py} # by default launch workers as defined in the Dockerfile
+    # stdin_open: ${INTERACTIVE:-false} # docker run -i
+    # tty: ${INTERACTIVE:-false}        # docker run -t


-  news_check:
+  news_check: # Creates a small webapp on http://localhost:8080 to check previously generated pdfs (some of which are unusable and must be marked as such)
    build: news_check
    image: news_check:latest
-
+    # user: 1001:1001 # since the app writes files to the local filesystem, it must be run as the current user
+    depends_on:
+      - db_passthrough
    volumes:
      - ${CONTAINER_DATA}:/app/containerdata # always set
      - ${CODE:-/dev/null}:/code # not set in prod, defaults to /dev/null
-
+    environment:
+      - UNAME=${UNAME}
    ports:
-      - 8080:80 # 80 inside container
+      - "8080:80" # 80 inside container
+    entrypoint: ${ENTRYPOINT:-python app.py} # by default launch workers as defined in the Dockerfile
+    tty: true
--- a/env/debug
+++ b/env/debug
@@ -3,8 +3,6 @@
 CONTAINER_DATA=~/Bulk/COSS/Downloads/coss_archiving

 CODE=./
-XAUTHORTIY=$XAUTHORTIY
-XSOCK=/tmp/.X11-unix

 DEBUG=true
 CHECK=false
--- a/44
+++ b/44
@@ -0,0 +1,44 @@
+#!/bin/bash
+set -e
+set -o ignoreeof
+
+echo "Bash script launching COSS_ARCHIVING..."
+
+
+# CHANGE ME!
+export CONTAINER_DATA=~/Bulk/COSS/Downloads/coss_archiving
+export UNAME=remy
+
+
+if [[ $1 == "debug" ]]
+then
+    export DEBUG=true
+    export HEADFULL=true
+    export CODE=./
+    export ENTRYPOINT=/bin/bash
+    # since service ports is not enough here, also execute up, which will
+    docker compose up -d
+elif [[ $1 == "production" ]]
+then
+    export DEBUG=false
+elif [[ $1 == "build" ]]
+then
+    export DEBUG=false
+    docker compose build
+    exit 0
+elif [[ $1 == "down" ]]
+then
+    docker compose stop
+    exit 0
+else
+    echo "Please specify the execution mode (debug/production/build) as the first argument"
+    exit 1
+fi
+
+shift # consumes the variable set in $1 so that $@ only contains the remaining arguments
+
+docker compose run -it --service-ports "$@"
+
+echo "Docker run finished, shutting down containers..."
+docker compose stop
+echo "Bye!"
--- a/misc/gather_media_files.py
+++ b/misc/gather_media_files.py
@@ -15,7 +15,7 @@ runner.configuration.models.set_db(
    runner.configuration.SqliteDatabase("../.dev/media_message_dummy.db"),  # chat_db (not needed here)
    runner.configuration.SqliteDatabase("../.dev/media_downloads.db")
 )
-runner.configuration.parsed["DOWNLOADS"]["local_storage_path"] = "../.dev/"
+runner.configuration.main_config["DOWNLOADS"]["local_storage_path"] = "../.dev/"


 def fetch():
--- a/news_check/Dockerfile
+++ b/news_check/Dockerfile
@@ -1,25 +1,25 @@
 FROM node:18.8 as build-deps

-WORKDIR /app
+WORKDIR /app/client
 COPY client/package.json ./
 COPY client/package-lock.json ./
 COPY client/rollup.config.js ./
 COPY client/src ./src/
+RUN npm install
 RUN npm run build


 FROM python:latest
 ENV TZ Europe/Zurich

-RUN apt-get update && apt-get install -y postgresql
-
-RUN mkdir -p /app/news_check
-
-COPY requirements.txt /app/requirements.txt
-RUN python3 -m pip install -r /app/requirements.txt
-COPY --from=build-deps /app/public /app/news_check/public
-
-COPY app /app/news_check
 WORKDIR /app/news_check

-CMD gunicorn app:app -w 1 --threads 2 -b 0.0.0.0:80
+COPY requirements.txt requirements.txt
+RUN python3 -m pip install -r requirements.txt
+
+COPY client/public/index.html client/public/index.html
+COPY --from=build-deps /app/client/public client/public/
+COPY server server/
+
+WORKDIR /app/news_check/server
+# CMD python app.py
--- a/news_check/client/public/index.html
+++ b/news_check/client/public/index.html
@@ -4,10 +4,9 @@
 	<meta charset='utf-8'>
 	<meta name='viewport' content='width=device-width,initial-scale=1'>

-	<title>Svelte app</title>
+	<title>NEWS CHECK</title>

-	<link rel='icon' type='image/png' href='/favicon.png'>
-	<link rel='stylesheet' href='/global.css'>
+	<link rel='icon' type='image/png' href='https://ethz.ch/etc/designs/ethz/img/icons/ETH-APP-Icons-Theme-white/192-xxxhpdi.png'>
 	<link rel='stylesheet' href='/build/bundle.css'>

 	<script defer src='/build/bundle.js'></script>
@@ -16,7 +15,7 @@
 	<script src="https://cdn.tailwindcss.com"></script>

 	<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.0.943/pdf.min.js"></script>
-	<html data-theme="light"></html>
+	<html data-theme="light"></html> <!-- Daisy-ui theme -->

 </head>

--- a/news_check/requirements.txt
+++ b/news_check/requirements.txt
@@ -1,4 +1,4 @@
 flask
 peewee
 markdown
-gunicorn
+psycopg2
--- a/news_check/server/README.md
+++ b/news_check/server/README.md
@@ -1,10 +0,0 @@
-# Svelte.js + Flask
-
-A super simple example of using Flask to serve a Svelte app and use it as a backend server.
-
-Run the following for development:
-
- `python server.py` to start the Flask server.
- `cd client; npm install; npm run autobuild` to automatically build and reload the Svelte frontend when it's changed.
-
-This example just queries the Flask server for a random number.
--- a/news_check/server/main.py
+++ b/news_check/server/main.py
@@ -1,4 +1,3 @@
-from crypt import methods
 import json
 from flask import Flask, send_from_directory, jsonify
 import random
@@ -7,7 +6,8 @@ app = Flask(__name__)


 ###############################################################################
-# SVELTE BACKEND. Always send index.html and the requested js-files. (compiled by npm)
+# SVELTE 'STATIC' BACKEND. Always send index.html and the requested js-files. (compiled by npm)
+
@app.route("/") #index.html
 def base():
    return send_from_directory('../client/public', 'index.html')
@@ -17,6 +17,7 @@ def home(path):



+
 ###############################################################################
 # API for news_check.

@@ -34,4 +35,4 @@ def set_article(id):


 if __name__ == "__main__":
-    app.run(debug=True)
+    app.run(host="0.0.0.0", port="80")
--- a/news_check/server/package-lock.json
+++ b/news_check/server/package-lock.json
@@ -1,3 +0,0 @@
-{
-  "lockfileVersion": 1
-}
--- a/news_check/test.py
+++ b/news_check/test.py
@@ -0,0 +1,20 @@
+import peewee
+
+db = peewee.PostgresqlDatabase('coss_archiving', user='ca_rw', password='pleasechangeit', host='vpn', port=5432)
+# db.connect()
+
+
+class Pet(peewee.Model):
+    name = peewee.CharField()
+    animal_type = peewee.CharField()
+
+    class Meta:
+        database = db # this model uses the "people.db" database
+with db:
+    db.create_tables([Pet])
+db.get_tables()
+
+t = Pet.create(name="Test", animal_type="test")
+
+for pet in Pet.select():
+    print(pet.name)
--- a/news_fetch/.dockerignore
+++ b/news_fetch/.dockerignore
@@ -1,2 +1,2 @@
-.dev/
+Dockerfile
 __pycache__/
--- a/news_fetch/Dockerfile
+++ b/news_fetch/Dockerfile
@@ -3,25 +3,18 @@ FROM python:latest
 ENV TZ Europe/Zurich


-RUN apt-get update && apt-get install -y \
-evince \
-# for checking
-xauth \
-#for gui
-ghostscript
-# for compression
-
-
 RUN useradd --create-home --shell /bin/bash --uid 1001 autonews
 # id mapped to local user
 # home directory needed for pip package installation
+RUN export PATH=/home/autonews/.local/bin:$PATH
+
+
 RUN mkdir -p /app/auto_news
 RUN chown -R autonews:autonews /app
 USER autonews
-RUN export PATH=/home/autonews/.local/bin:$PATH

 COPY requirements.txt /app/requirements.txt
 RUN python3 -m pip install -r /app/requirements.txt

-COPY app /app/auto_news
+COPY . /app/auto_news
 WORKDIR /app/auto_news
--- a/news_fetch/app/configuration.py
+++ b/news_fetch/app/configuration.py
@@ -1,59 +0,0 @@
-from dataclasses import dataclass
-import os
-import shutil
-import configparser
-import logging
-from datetime import datetime
-from peewee import SqliteDatabase
-from rich.logging import RichHandler
-
-# first things first: logging
-logging.basicConfig(
-    format='%(message)s',
-    level=logging.INFO,
-    datefmt='%H:%M:%S', # add %Y-%m-%d if needed
-    handlers=[RichHandler()]
-    )
-logger = logging.getLogger(__name__)
-
-
-# load config file containing constants and secrets
-parsed = configparser.ConfigParser()
-parsed.read("/app/containerdata/config/news_fetch.config.ini")
-
-if os.getenv("DEBUG", "false") == "true":
-    logger.warning("Found 'DEBUG=true', setting up dummy databases")
-    
-    db_base_path = parsed["DATABASE"]["db_path_dev"]
-    parsed["SLACK"]["archive_id"] = parsed["SLACK"]["debug_id"]
-    parsed["MAIL"]["recipient"] = parsed["MAIL"]["sender"]
-    parsed["DOWNLOADS"]["local_storage_path"] = parsed["DATABASE"]["db_path_dev"]
-else:
-    logger.warning("Found 'DEBUG=false' and running on production databases, I hope you know what you're doing...")
-    db_base_path = parsed["DATABASE"]["db_path_prod"]
-    logger.info("Backing up databases")
-    backup_dst = parsed["DATABASE"]["db_backup"]
-    today = datetime.today().strftime("%Y.%m.%d")
-    shutil.copyfile(
-        os.path.join(db_base_path, parsed["DATABASE"]["chat_db_name"]), 
-        os.path.join(backup_dst, today + "." + parsed["DATABASE"]["chat_db_name"]), 
-        )
-    shutil.copyfile(
-        os.path.join(db_base_path, parsed["DATABASE"]["download_db_name"]), 
-        os.path.join(backup_dst, today + "." + parsed["DATABASE"]["download_db_name"]), 
-        )
-
-
-from utils_storage import models
-
-# Set up the database
-models.set_db(
-    SqliteDatabase(
-        os.path.join(db_base_path, parsed["DATABASE"]["chat_db_name"]),
-        pragmas = {'journal_mode': 'wal'} # mutliple threads can read at once
-    ),
-    SqliteDatabase(
-        os.path.join(db_base_path, parsed["DATABASE"]["download_db_name"]),
-        pragmas = {'journal_mode': 'wal'} # mutliple threads can read at once
-    )
-)
--- a/news_fetch/app/utils_slack/message_helpers.py
+++ b/news_fetch/app/utils_slack/message_helpers.py
@@ -1,285 +0,0 @@
-import logging
-import configuration
-import requests
-import os
-import time
-from threading import Thread
-from slack_sdk.errors import SlackApiError
-
-logger = logging.getLogger(__name__)
-config = configuration.parsed["SLACK"]
-models = configuration.models
-slack_client = "dummy"
-LATEST_RECORDED_REACTION = 0
-
-
-def init(client) -> None:
-    """Starts fetching past messages and returns the freshly launched thread"""
-    global slack_client
-    slack_client = client
-
-    global LATEST_RECORDED_REACTION
-    try:
-        LATEST_RECORDED_REACTION = models.Reaction.select(models.Reaction.id).order_by("id")[-1]
-    except IndexError: #query is actually empty, we have never fetched any messages until now
-        LATEST_RECORDED_REACTION = 0    
-    
-    # fetch all te messages we could have possibly missed
-    logger.info("Querying missed messages, threads and reactions. This can take some time.")
-    fetch_missed_channel_messages() # not threaded
-    t = Thread(target = fetch_missed_channel_reactions, daemon=True) # threaded, runs in background (usually takes a long time)
-    t.start()
-
-    if os.getenv("REDUCEDFETCH", "false") == "true":
-        logger.warning("Only fetching empty threads for bot messages because 'REDUCEDFETCH=true'")
-        fetch_missed_thread_messages(reduced=True)
-    else: # perform both asyncronously
-        fetch_missed_thread_messages()
-    
-
-
-def get_unhandled_messages():
-    """Gets all messages that have not yet been handled, be it by mistake or by downtime
-    As the message handler makes no distinction between channel messages and thread messages,
-    we don't have to worry about them here.
-    """
-
-    threaded_objects = []
-    for t in models.Thread.select():
-        if t.message_count > 1: # if only one message was written, it is the channel message
-            msg = t.last_message
-            if msg.is_by_human:
-                threaded_objects.append(msg)
-            # else don't, nothing to process
-    logger.info(f"Set {len(threaded_objects)} thread-messages as not yet handled.")
-
-
-    channel_objects = [t.initiator_message for t in models.Thread.select() if (t.message_count == 1 and not t.is_fully_processed)]
-    logger.info(f"Set {len(channel_objects)} channel-messages as not yet handled.")
-    
-    reaction_objects = list(models.Reaction.select().where(models.Reaction.id > LATEST_RECORDED_REACTION))
-    logger.info(f"Set {len(reaction_objects)} reactions as not yet handled.")
-    # the ones newer than the last before the fetch
-    
-    all_messages = channel_objects + threaded_objects
-    return all_messages, reaction_objects
-
-
-def fetch_missed_channel_messages():
-    # latest processed message_ts is:
-    presaved = models.Message.select().order_by(models.Message.ts)
-    if not presaved:
-        last_ts = 0
-    else:
-        last_message = presaved[-1]
-        last_ts = last_message.slack_ts
-    
-    result = slack_client.conversations_history(
-        channel=config["archive_id"],
-        oldest=last_ts
-    )
-
-    new_messages = result.get("messages", [])
-    # # filter the last one, it is a duplicate! (only if the db is not empty!)
-    # if last_ts != 0 and len(new_messages) != 0:
-    #     new_messages.pop(-1)
-
-    new_fetches = 0
-    for m in new_messages:
-        # print(m)
-        message_dict_to_model(m)
-        new_fetches += 1
-
-    refetch = result.get("has_more", False)
-    while refetch: # we have not actually fetched them all
-        try:
-            result = slack_client.conversations_history(
-                channel = config["archive_id"],
-                cursor = result["response_metadata"]["next_cursor"],
-                oldest = last_ts
-            ) # fetches 100 messages, older than the [-1](=oldest) element of new_fetches
-            refetch = result.get("has_more", False)
-
-            new_messages = result.get("messages", [])
-            for m in new_messages:
-                message_dict_to_model(m)
-                new_fetches += 1
-        except SlackApiError: # Most likely a rate-limit
-            logger.error("Error while fetching channel messages. (likely rate limit) Retrying in {} seconds...".format(config["api_wait_time"]))
-            time.sleep(config["api_wait_time"])
-            refetch = True
-    
-    logger.info(f"Fetched {new_fetches} new channel messages.")
-
-
-def fetch_missed_thread_messages(reduced=False):
-    """After having gotten all base-threads, we need to fetch all their replies"""        
-    # I don't know of a better way: we need to fetch this for each and every thread (except if it is marked as permanently solved)
-    logger.info("Starting fetch of thread messages...")
-    if reduced:
-        threads = [t for t in models.Thread.select() if (t.message_count == 1 and not t.is_fully_processed)]
-        # this only fetches completely empty threads, which might be because the bot-message was not yet saved to the db.
-        # once we got all the bot-messages the remaining empty threads will be the ones we need to process.
-    else:
-        threads = [t for t in models.Thread.select() if not t.is_fully_processed]
-    logger.info(f"Fetching history for {len(threads)} empty threads")
-    new_messages = []
-    for i,t in enumerate(threads):
-        try:
-            messages = slack_client.conversations_replies(
-                channel = config["archive_id"],
-                ts = t.slack_ts,
-                oldest = t.messages[-1].slack_ts
-            )["messages"]
-        except SlackApiError:
-            logger.error("Hit rate limit while querying threaded messages, retrying in {}s ({}/{} queries elapsed)".format(config["api_wait_time"], i, len(threads)))
-            time.sleep(int(config["api_wait_time"]))
-            messages = slack_client.conversations_replies(
-                channel = config["archive_id"],
-                ts = t.slack_ts,
-                oldest = t.messages[-1].slack_ts
-            )["messages"]
-
-        messages.pop(0) # the first message is the one posted in the channel. We already processed it!
-        
-        for m in messages:
-            # only append *new* messages
-            res = message_dict_to_model(m)
-            if res:
-                new_messages.append(res)
-    logger.info("Fetched {} new threaded messages.".format(len(new_messages)))
-
-
-def fetch_missed_channel_reactions():
-    logger.info("Starting background fetch of channel reactions...")
-    threads = [t for t in models.Thread.select() if not t.is_fully_processed]
-    for i,t in enumerate(threads):
-        reactions = []
-        try:
-            query = slack_client.reactions_get(
-                channel = config["archive_id"],
-                timestamp = t.slack_ts
-            )
-            reactions = query.get("message", []).get("reactions", []) # default = []
-        except SlackApiError as e:
-            if e.response.get("error", "") == "message_not_found":
-                m = t.initiator_message
-                logger.warning(f"Message (id={m.id}) not found. Skipping and saving...")
-                # this usually means the message is past the 1000 message limit imposed by slack. Mark it as processed in the db
-                m.is_processed_override = True
-                m.save()
-            else: # probably a rate_limit:
-                logger.error("Hit rate limit while querying reactions. retrying in {}s ({}/{} queries elapsed)".format(config["api_wait_time"], i, len(threads)))
-                time.sleep(int(config["api_wait_time"]))
-
-        for r in reactions:
-            reaction_dict_to_model(r, t)
-
-
-
-
-# Helpers for message conversion to db-objects
-def reaction_dict_to_model(reaction, thread=None):
-    if thread is None:
-        m_ts = reaction["item"]["ts"]
-        message = models.Message.get(ts = float(m_ts))
-        thread = message.thread
-    if "name" in reaction.keys(): # fetched through manual api query
-        content = reaction["name"]
-    elif "reaction" in reaction.keys(): # fetched through events
-        content = reaction["reaction"]
-    else:
-        logger.error(f"Weird reaction received: {reaction}")
-        return None
-
-    r, _ = models.Reaction.get_or_create(
-        type = content,
-        message = thread.initiator_message
-    )
-    logger.info("Saved reaction [{}]".format(content))
-    return r
-
-
-def message_dict_to_model(message):
-    if message["type"] == "message":
-        thread_ts = message["thread_ts"] if "thread_ts" in message else message["ts"]
-        uid = message.get("user", "BAD USER")
-        if uid == "BAD USER":
-            logger.critical("Message has no user?? {}".format(message))
-            return None
-        
-        user, _ = models.User.get_or_create(user_id = uid)
-        thread, _ = models.Thread.get_or_create(thread_ts = thread_ts)
-        m, new = models.Message.get_or_create(
-            user = user,
-            thread = thread,
-            ts = message["ts"],
-            channel_id = config["archive_id"],
-            text = message["text"]
-        )
-        logger.info(f"Saved: {m} ({'new' if new else 'old'})")
-
-        files = message.get("files", [])
-        if len(files) >= 1:
-            f = files[0] #default: []
-            m.file_type = f["filetype"]
-            m.perma_link = f["url_private_download"]
-            m.save()
-            logger.info(f"Saved {m.file_type}-file for message (id={m.id})")
-        if new:
-            return m
-        else:
-            return None
-    else:
-        logger.warning("What should I do of {}".format(message))
-        return None
-
-
-def say_substitute(*args, **kwargs):
-    logger.info("Now sending message through say-substitute: {}".format(" - ".join(args)))
-    slack_client.chat_postMessage(
-        channel=config["archive_id"],
-        text=" - ".join(args),
-        **kwargs
-    )
-    
-
-def save_as_related_file(url, article_object):
-    r = requests.get(url, headers={"Authorization": "Bearer {}".format(slack_client.token)})
-    saveto = article_object.save_path
-    ftype = url[url.rfind(".") + 1:]
-    fname = "{} - related no {}.{}".format(
-        article_object.file_name.replace(".pdf",""),
-        len(article_object.related) + 1,
-        ftype
-    )
-    with open(os.path.join(saveto, fname), "wb") as f:
-        f.write(r.content)
-    article_object.set_related([fname])
-    logger.info("Added {} to model {}".format(fname, article_object))
-    return fname
-
-
-def react_file_path_message(fname, article_object):
-    saveto = article_object.save_path
-    file_path = os.path.join(saveto, fname)
-    if os.path.exists(file_path):
-        article_object.set_related([fname])
-        logger.info("Added {} to model {}".format(fname, article_object))
-        return True
-    else:
-        return False
-
-
-def is_message_in_archiving(message) -> bool:
-    if isinstance(message, dict):
-        return message["channel"] == config["archive_id"]
-    else:
-        return message.channel_id == config["archive_id"]
-
-
-def is_reaction_in_archiving(event) -> bool:
-    if isinstance(event, dict):
-        return event["item"]["channel"] == config["archive_id"]
-    else:
-        return event.message.channel_id == config["archive_id"]
--- a/news_fetch/app/utils_slack/runner.py
+++ b/news_fetch/app/utils_slack/runner.py
@@ -1,189 +0,0 @@
-from slack_bolt import App
-from slack_bolt.adapter.socket_mode import SocketModeHandler
-from slack_sdk.errors import SlackApiError
-
-import logging
-import configuration
-
-from . import message_helpers
-
-
-config = configuration.parsed["SLACK"]
-models = configuration.models
-
-class BotApp(App):
-    logger = logging.getLogger(__name__)
-
-    def __init__(self, callback, *args, **kwargs):
-
-        super().__init__(*args, **kwargs)
-        self.callback = callback
-
-    def pre_start(self):
-        message_helpers.init(self.client)
-        missed_messages, missed_reactions = message_helpers.get_unhandled_messages()
-
-        [self.handle_incoming_message(m) for m in missed_messages]
-        [self.handle_incoming_reaction(r) for r in missed_reactions]
-
-        # self.react_missed_reactions(missed_reactions)
-        # self.react_missed_messages(missed_messages)
-        self.startup_status()
-
-
-
-    def handle_incoming_reaction(self, reaction):
-        if isinstance(reaction, dict): #else: the reaction is already being passed as a model
-            # CAUTION: filter for 'changed reactions' those are nasty (usually when adding an url)
-            reaction = message_helpers.reaction_dict_to_model(reaction)
-
-        thread = reaction.message.thread
-        article_object = thread.article
-        if not article_object is None:
-            reaction = reaction.type
-            status = 1 if reaction == "white_check_mark" else -1
-
-            # self.logger.info(f"Applying reaction {reaction} to its root message.")
-            article_object.verified = status
-            article_object.save()
-
-
-    def handle_incoming_message(self, message):
-        """Reacts to all messages inside channel archiving. Must then
-        distinguish between threaded replies and new requests
-        and react accordingly"""
-        if isinstance(message, dict): #else: the message is already being passed as a model
-            # CAUTION: filter for 'changed messages' those are nasty (usually when adding an url)
-            if message.get("subtype", "not bad") == "message_changed":
-                return False
-            message = message_helpers.message_dict_to_model(message)
-
-        # First check: belongs to thread?
-        is_threaded = message.thread.message_count > 1 and message != message.thread.initiator_message
-        if is_threaded:
-            self.incoming_thread_message(message)
-        else:
-            self.incoming_channel_message(message)
-            
-
-    def incoming_thread_message(self, message):
-        if message.user.user_id == config["bot_id"]:
-            return True # ignore the files uploaded by the bot. We handled them already!
-        
-        thread = message.thread
-        if thread.is_fully_processed:
-            return True
-
-        self.logger.info("Receiving thread-message")
-        self.respond_thread_message(message)
-
-
-    def incoming_channel_message(self, message):
-        self.logger.info(f"Handling message {message} ({len(message.urls)} urls)")
-
-        if not message.urls: # no urls in a root-message => IGNORE
-            message.is_processed_override = True
-            message.save()
-            return
-
-        # ensure thread is still empty, this is a scenario encountered only in testing, but let's just filter it
-        if message.thread.message_count > 1:
-            self.logger.info("Discarded message because it is actually processed.")
-            return
-        
-        if len(message.urls) > 1:
-            message_helpers.say_substitute("Only the first url is being handled. Please send any subsequent url as a separate message", thread_ts=message.thread.slack_ts)
-        
-        self.callback(message)
-        # for url in message.urls:
-            # self.callback(url, message)
-            # stop here!
-
-
-
-    def respond_thread_message(self, message, say=message_helpers.say_substitute):
-        thread = message.thread
-        article = thread.article
-        if message.perma_link: # file upload means new data    
-            fname = message_helpers.save_as_related_file(message.perma_link, article)
-            say("File was saved as 'related file' under `{}`.".format(fname),
-                thread_ts=thread.slack_ts
-            )
-        else: # either a pointer to a new file (too large to upload), or trash
-            success = message_helpers.react_file_path_message(message.text, article)
-            if success:
-                say("File was saved as 'related file'", thread_ts=thread.slack_ts)
-            else:
-                self.logger.error("User replied to thread {} but the response did not contain a file/path".format(thread))
-                say("Cannot process response without associated file.",
-                    thread_ts=thread.slack_ts
-                )
-
-
-    def respond_channel_message(self, thread, say=message_helpers.say_substitute):
-        article = thread.article
-        answers = article.slack_info
-        for a in answers:
-            if a["file_path"]:
-                try: # upload resulted in an error
-                    self.client.files_upload(
-                        channels = config["archive_id"],
-                        initial_comment = f"<@{config['responsible_id']}> \n {a['reply_text']}",
-                        file = a["file_path"],
-                        thread_ts = thread.slack_ts
-                    )
-                    status = True
-                except SlackApiError as e:
-                    say(
-                        "File {} could not be uploaded.".format(a),
-                        thread_ts=thread.slack_ts
-                    )
-                    status = False
-                    self.logger.error(f"File upload failed: {e}")
-            else: # anticipated that there is no file!
-                say(
-                    f"<@{config['responsible_id']}> \n {a['reply_text']}",
-                    thread_ts=thread.slack_ts
-                )
-                status = True
-        
-
-    def startup_status(self):
-        threads = [t for t in models.Thread.select()]
-        all_threads = len(threads)
-        fully_processed = len([t for t in threads if t.is_fully_processed])
-        fully_unprocessed = len([t for t in threads if t.message_count == 1])
-        articles_unprocessed = len(models.ArticleDownload.select().where(models.ArticleDownload.verified < 1))
-        self.logger.info(f"[bold]STATUS[/bold]: Fully processed {fully_processed}/{all_threads} threads. {fully_unprocessed} threads have 0 replies. Article-objects to verify: {articles_unprocessed}", extra={"markup": True})
-
-
-    
-
-
-class BotRunner():
-    """Stupid encapsulation so that we can apply the slack decorators to the BotApp"""
-    def __init__(self, callback, *args, **kwargs) -> None:
-        self.bot_worker = BotApp(callback, token=config["auth_token"])
-
-        @self.bot_worker.event(event="message", matchers=[message_helpers.is_message_in_archiving])
-        def handle_incoming_message(message, say):
-            return self.bot_worker.handle_incoming_message(message)
-
-        @self.bot_worker.event(event="reaction_added", matchers=[message_helpers.is_reaction_in_archiving])
-        def handle_incoming_reaction(event, say):
-            return self.bot_worker.handle_incoming_reaction(event)
-
-        self.handler = SocketModeHandler(self.bot_worker, config["app_token"])
-
-
-    def start(self):
-        self.bot_worker.pre_start()
-        self.handler.start()
-
-
-    def stop(self):
-        self.handler.close()
-        print("Bye handler!")
-
-    # def respond_to_message(self, message):
-    #     self.bot_worker.handle_incoming_message(message)
--- a/news_fetch/app/utils_storage/models.py
+++ b/news_fetch/app/utils_storage/models.py
@@ -1,331 +0,0 @@
-import logging
-logger = logging.getLogger(__name__)
-
-from peewee import *
-import os
-import markdown
-import re
-import configuration
-import datetime
-
-config = configuration.parsed["DOWNLOADS"]
-slack_config = configuration.parsed["SLACK"]
-
-## Helpers
-chat_db = DatabaseProxy()
-download_db = DatabaseProxy()
-
-# set the nature of the db at runtime
-
-class DownloadBaseModel(Model):
-    class Meta:
-        database = download_db
-
-class ChatBaseModel(Model):
-    class Meta:
-        database = chat_db
-
-
-
-## == Article related models == ##
-class ArticleDownload(DownloadBaseModel):
-    title = CharField(default='')
-    pub_date = DateField(default = '')
-    download_date = DateField(default = datetime.date.today)
-    source_name = CharField(default = '')
-    article_url = TextField(default = '', unique=True)
-    archive_url = TextField(default = '')
-    file_name = TextField(default = '')
-    language = CharField(default = '')
-    summary = TextField(default = '')
-    comment = TextField(default = '')
-    verified = IntegerField(default = False)
-    # authors
-    # keywords
-    # ... are added through foreignkeys
-
-    def __str__(self) -> str:
-        if self.title != '' and self.source_name != '':
-            desc = f"{shorten_name(self.title)} -- {self.source_name}"
-        else:
-            desc = f"{self.article_url}"
-        return f"ART [{desc}]"
-
-    ## Useful Properties
-    @property
-    def save_path(self):
-        return f"{config['local_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/"
-
-    def fname_nas(self, file_name=""):
-        if self.download_date:
-            if file_name:
-                return "NAS: {}/{}/{}/{}".format(config["remote_storage_path"], self.download_date.year, self.download_date.strftime("%B"), file_name)
-            else: # return the self. name
-                return "NAS: {}/{}/{}/{}".format(config["remote_storage_path"], self.download_date.year, self.download_date.strftime("%B"), self.file_name)
-        else:
-            return None
-
-    @property
-    def fname_template(self):
-        if "youtube.com" in self.source_name or "youtu.be" in self.source_name:
-            fname = "{} -- {}".format(self.source_name, self.title)
-        else:
-            fname = "{} -- {}.pdf".format(self.source_name, self.title)
-        return clear_path_name(fname)
-
-    @property
-    def is_title_bad(self):  # add incrementally
-        return "PUR-Abo" in self.title \
-            or "Redirecting" in self.title \
-            or "Error while running fetch" in self.title
-            
-    @property
-    def slack_info(self):
-        status = [":x: No better version available", ":gear: Verification pending", ":white_check_mark: Verified by human"][self.verified + 1]
-        content = "\n>" + "\n>".join(self.summary.split("\n"))
-        file_status, msg = self.file_status()
-        if not file_status:
-            return [msg]
-        
-        # everything alright: generate real content
-        # first the base file
-        if self.file_name[-4:] == ".pdf":
-            answer = [{ # main reply with the base pdf
-                "reply_text" : f"*{self.title}*\n{status}\n{content}",
-                "file_path" : self.save_path + self.file_name 
-            }]
-        else: # don't upload if the file is too big!
-            location = "Not uploaded to slack, but the file will be on the NAS:\n`{}`".format(self.fname_nas())
-            answer = [{ # main reply with the base pdf
-                "reply_text" : "*{}*\n{}\n{}\n{}".format(self.title, status, content, location),
-                "file_path" : None 
-            }]
-
-        # then the related files
-        rel_text = ""
-        for r in self.related:
-            fname = r.related_file_name
-            lentry = "\n• `{}` ".format(self.fname_nas(fname))
-            if fname[-4:] == ".pdf": # this is a manageable file, directly upload
-                f_ret = self.save_path + fname
-                answer.append({"reply_text":"", "file_path" : f_ret})
-            else: # not pdf <=> too large. Don't upload but mention its existence
-                lentry += "(not uploaded to slack, but the file will be on the NAS)"
-                
-            rel_text += lentry
-
-        if rel_text:
-            rel_text = answer[0]["reply_text"] = answer[0]["reply_text"] + "\nRelated files:\n" + rel_text
-        
-        return answer
-
-    @property
-    def mail_info(self):
-        base = [{"reply_text": "[{}]({})\n".format(self.article_url, self.article_url), "file_path":None}] + self.slack_info
-        return [{"reply_text": markdown.markdown(m["reply_text"]), "file_path": m["file_path"]} for m in base]
-
-
-    ## Helpers
-    def set_keywords(self, keywords):
-        for k in keywords:
-            ArticleKeyword.create(
-                article = self,
-                keyword = k
-                )
-
-    def set_authors(self, authors):
-        for a in authors:
-            ArticleAuthor.create(
-                article = self,
-                author = a
-                )
-
-    def set_references(self, references):
-        for r in references:
-            ArticleReference.create(
-                article = self,
-                reference_url = r
-            )
-
-    def set_related(self, related):
-        for r in related:
-            ArticleRelated.create(
-                article = self,
-                related_file_name = r
-            )
-
-    def file_status(self):
-        if not self.file_name:
-            logger.error("Article {} has no filename!".format(self))
-            return False, {"reply_text": "Download failed, no file was saved.", "file_path": None}
-        
-        file_path_abs = self.save_path + self.file_name
-        if not os.path.exists(file_path_abs):
-            logger.error("Article {} has a filename, but the file does not exist at that location!".format(self))
-            return False, {"reply_text": "Can't find file. Either the download failed or the file was moved.", "file_path": None}
-
-        return True, {}
-
-
-class ArticleKeyword(DownloadBaseModel):
-    # instance gets created for every one keyword -> flexible in size
-    article = ForeignKeyField(ArticleDownload, backref='keywords')
-    keyword = CharField()
-
-
-class ArticleAuthor(DownloadBaseModel):
-    article = ForeignKeyField(ArticleDownload, backref='authors')
-    author = CharField()
-
-
-class ArticleReference(DownloadBaseModel):
-    article = ForeignKeyField(ArticleDownload, backref='references')
-    reference_url = TextField(default = '')
-
-
-class ArticleRelated(DownloadBaseModel):
-    article = ForeignKeyField(ArticleDownload, backref='related')
-    related_file_name = TextField(default = '')
-
-
-
-
-## == Slack-thread related models == ##
-class User(ChatBaseModel):
-    user_id = CharField(default='', unique=True)
-    # messages
-
-
-class Thread(ChatBaseModel):
-    """The threads that concern us are only created if the base massage contains a url"""
-    thread_ts = FloatField(default = 0)
-    article = ForeignKeyField(ArticleDownload, backref="slack_thread", null=True, default=None)
-    # provides, ts, user, models
-    # messages
-
-    @property
-    def slack_ts(self):
-        str_ts = str(self.thread_ts)
-        cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem!
-        return "{}{}".format(str_ts, cut_zeros*"0")
-
-    @property
-    def initiator_message(self):
-        try:
-            return self.messages[0] # TODO check if this needs sorting
-        except IndexError:
-            logger.warning(f"Thread {self} is empty. How can that be?")
-            return None
-
-    @property
-    def message_count(self):
-        # logger.warning("message_count was called")
-        return self.messages.count()
-
-    @property
-    def last_message(self):
-        messages = Message.select().where(Message.thread == self).order_by(Message.ts) # can't be empty by definition/creation
-        return messages[-1]
-
-    @property
-    def is_fully_processed(self) -> bool:
-        init_message = self.initiator_message
-        if init_message is None:
-            return False
-        
-        if init_message.is_processed_override:
-            return True
-        # this override is set for instance, when no url was sent at all. Then set this thread to be ignored
-        
-        reactions = init_message.reaction
-        if not reactions:
-            return False
-        else:
-            r = reactions[0].type # can and should only have one reaction
-            return r == "white_check_mark" \
-                or r == "x"
-
-
-    
-class Message(ChatBaseModel):
-    ts = FloatField(unique=True) #for sorting
-    channel_id = CharField(default='')
-    user = ForeignKeyField(User, backref="messages")
-    text = TextField(default='')
-    thread = ForeignKeyField(Thread, backref="messages", default=None)
-    file_type = CharField(default='')
-    perma_link = CharField(default='')
-    is_processed_override = BooleanField(default=False)
-    # reaction
-
-    def __str__(self) -> str:
-        return "MSG [{}]".format(shorten_name(self.text).replace('\n','/'))
-
-    @property
-    def slack_ts(self):
-        str_ts = str(self.ts)
-        cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem!
-        return "{}{}".format(str_ts, cut_zeros * "0")
-
-
-    @property
-    def urls(self):
-        pattern = r"<(.*?)>"
-        matches = re.findall(pattern, self.text)
-        matches = [m for m in matches if "." in m]
-        
-        new_matches = []
-        for m in matches:
-            if "." in m:  # must contain a tld, right?
-                # further complication: slack automatically abreviates urls in the format: 
-                # <url|link preview>. Lucky for us, "|" is a character derecommended in urls, meaning we can "safely" split for it and retain the first half
-                if "|" in m:
-                    keep = m.split("|")[0]
-                else:
-                    keep = m
-                new_matches.append(keep)
-        return new_matches
-    
-    @property
-    def is_by_human(self):
-        return self.user.user_id != slack_config["bot_id"]
-
-    
-    @property
-    def has_single_url(self):
-        return len(self.urls) == 1
-
-
-class Reaction(ChatBaseModel):
-    type = CharField(default = "")
-    message = ForeignKeyField(Message, backref="reaction")
-
-
-
-
-
-
-
-
-def create_tables():
-    with download_db:
-        download_db.create_tables([ArticleDownload, ArticleKeyword, ArticleAuthor, ArticleReference, ArticleRelated])
-    with chat_db:
-        chat_db.create_tables([User, Message, Thread, Reaction])
-
-
-def set_db(chat_db_object, download_db_object):
-    chat_db.initialize(chat_db_object)
-    download_db.initialize(download_db_object)
-    create_tables()
-
-def clear_path_name(path):
-    keepcharacters = (' ','.','_', '-')
-    converted = "".join([c if (c.isalnum() or c in keepcharacters) else "_" for c in path]).rstrip()
-    return converted
-
-def shorten_name(name, offset = 50):
-    if len(name) > offset:
-        return name[:offset] + "..."
-    else:
-        return name
--- a/news_fetch/configuration.py
+++ b/news_fetch/configuration.py
@@ -0,0 +1,66 @@
+import os
+import shutil
+import configparser
+import logging
+from datetime import datetime
+from peewee import SqliteDatabase, PostgresqlDatabase
+from rich.logging import RichHandler
+
+
+# first things first: logging
+logging.basicConfig(
+    format='%(message)s',
+    level=logging.INFO,
+    datefmt='%H:%M:%S', # add %Y-%m-%d if needed
+    handlers=[RichHandler()]
+    )
+logger = logging.getLogger(__name__)
+
+
+# load config file containing constants and secrets
+main_config = configparser.ConfigParser()
+main_config.read("/app/containerdata/config/news_fetch.config.ini")
+db_config = configparser.ConfigParser()
+db_config.read("/app/containerdata/config/db.config.ini")
+
+
+# DEBUG MODE:
+if os.getenv("DEBUG", "false") == "true":
+    logger.warning("Found 'DEBUG=true', setting up dummy databases")
+    
+    main_config["SLACK"]["archive_id"] = main_config["SLACK"]["debug_id"]
+    main_config["MAIL"]["recipient"] = main_config["MAIL"]["sender"]
+    main_config["DOWNLOADS"]["local_storage_path"] = main_config["DOWNLOADS"]["debug_storage_path"]
+
+    download_db = SqliteDatabase(
+        main_config["DATABASE"]["download_db_debug"],
+        pragmas = {'journal_mode': 'wal'} # mutliple threads can read at once
+    )
+
+# PRODUCTION MODE:
+else:
+    logger.warning("Found 'DEBUG=false' and running on production databases, I hope you know what you're doing...")
+    
+    cred = db_config["DATABASE"]
+    download_db = PostgresqlDatabase(
+        cred["db_name"], user=cred["user_name"], password=cred["password"], host="vpn", port=5432
+    )
+    # TODO Reimplement backup/printout
+    # logger.info("Backing up databases")
+    # backup_dst = main_config["DATABASE"]["db_backup"]
+    # today = datetime.today().strftime("%Y.%m.%d")
+    # shutil.copyfile(
+    #     os.path.join(db_base_path, main_config["DATABASE"]["chat_db_name"]), 
+    #     os.path.join(backup_dst, today + "." + main_config["DATABASE"]["chat_db_name"]), 
+    #     )
+    # shutil.copyfile(
+    #     os.path.join(db_base_path, main_config["DATABASE"]["download_db_name"]), 
+    #     os.path.join(backup_dst, today + "." + main_config["DATABASE"]["download_db_name"]), 
+    #     )
+
+
+
+from utils_storage import models
+
+# Set up the database
+models.set_db(download_db)
--- a/news_fetch/app/runner.py
+++ b/news_fetch/app/runner.py
@@ -3,7 +3,6 @@ import configuration
 models = configuration.models
 from threading import Thread
 import logging
-import os
 import sys
 logger = logging.getLogger(__name__)

@@ -14,10 +13,9 @@ from utils_worker.workers import CompressWorker, DownloadWorker, FetchWorker, Up

 class ArticleWatcher:
    """Wrapper for a newly created article object. Notifies the coordinator upon change/completition"""
-    def __init__(self, article, thread, **kwargs) -> None:
+    def __init__(self, article, **kwargs) -> None:
        self.article_id = article.id # in case article becomes None at any point, we can still track the article
        self.article = article
-        self.thread = thread

        self.completition_notifier = kwargs.get("notifier")
        self.fetch = kwargs.get("worker_fetch", None)
@@ -50,7 +48,7 @@ class ArticleWatcher:
        elif completed_action == "download":
            self.compress.process(self)
        elif completed_action == "compress": # last step
-            self.completition_notifier(self.article, self.thread)
+            self.completition_notifier(self.article)
            # triggers action in Coordinator
        elif completed_action == "upload":
            # this case occurs when upload was faster than compression
@@ -118,17 +116,34 @@ class Coordinator(Thread):

    def launch(self) -> None:
        for w in [self.worker_download, self.worker_fetch, self.worker_upload, self.worker_compress]:
-            if not w is None:
+            if not w is None: # for reduced operations such as upload, some workers are set to None
                w.start()

+        # if past messages have not been sent, they must be reevaluated
+        unsent = models.ArticleDownload.filter(sent = False)
+        # .objects.filter(sent = False)
+        for a in unsent:
+            print(a)
+            self.incoming_request(article=a)

-    def incoming_request(self, message):
-        """This method is passed onto the slack worker. It gets triggered when a new message is received."""
+
+    def incoming_request(self, message=None, article=None):
+        """This method is passed onto the slack worker. It then is called when a new message is received."""
+        
+        if message is not None:
+            try:
                url = message.urls[0] # ignore all the other ones
+            except IndexError:
+                return
            article, is_new = models.ArticleDownload.get_or_create(article_url=url)
-        thread = message.thread
-        thread.article = article
-        thread.save()
+            article.slack_ts = message.ts # either update the timestamp (to the last reference to the article) or set it for the first time
+        elif article is not None:
+            is_new = False
+            logger.info(f"Received article {article} in incoming_request")
+        else:
+            logger.error("Coordinator.incoming_request called with no arguments")
+            return
+
        self.kwargs.update({"notifier" : self.article_complete_notifier})

        if is_new or (article.file_name == "" and article.verified == 0):
@@ -136,7 +151,6 @@ class Coordinator(Thread):
            # this overwrites previously set information, but that should not be too important
            ArticleWatcher(
                article,
-                thread,
                **self.kwargs   
            )

@@ -146,7 +160,7 @@ class Coordinator(Thread):
            # the watcher will notify once it is sufficiently populated
        else: # manually trigger notification immediatly
            logger.info(f"Found existing article {article}. Now sending")
-            self.article_complete_notifier(article, thread)
+            self.article_complete_notifier(article)



@@ -155,34 +169,33 @@ class Coordinator(Thread):
            w.start()

        for article in articles:
-            notifier = lambda article: print(f"Completed manual actions for {article}")
+            notifier = lambda article: logger.info(f"Completed manual actions for {article}")
            ArticleWatcher(article, None, workers_manual = workers, notifier = notifier) # Article watcher wants a thread to link article to TODO: handle threads as a kwarg 

-    def article_complete_notifier(self, article, thread):
+    def article_complete_notifier(self, article):
        if self.worker_slack is None:
-            logger.warning("Not sending slack notifier")
+            logger.warning("Skipping slack notification because worker is None")
        else:
-            self.worker_slack.bot_worker.respond_channel_message(thread)
+            self.worker_slack.bot_worker.respond_channel_message(article)
        if self.worker_mail is None:
-            logger.warning("Not sending mail notifier")
+            logger.warning("Skipping mail notification because worker is None")
        else:
            self.worker_mail.send(article)
        
+        article.sent = True
+        article.save()
+


 if __name__ == "__main__":
    coordinator = Coordinator()


-    if os.getenv("UPLOAD", "false") == "true":
+    if "upload" in sys.argv:
        articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "").execute()
        logger.info(f"Launching upload to archive for {len(articles)} articles.")
        coordinator.manual_processing(articles, [UploadWorker()])

-    elif os.getenv("CHECK", "false") == "true":
-        from utils_check import runner as check_runner
-        check_runner.verify_unchecked()
-
    else: # launch with full action
        slack_runner = slack_runner.BotRunner(coordinator.incoming_request)
        kwargs = {
@@ -196,10 +209,10 @@ if __name__ == "__main__":
        try:
            coordinator.add_workers(**kwargs)
            coordinator.start()
-            slack_runner.start()
+            slack_runner.start() # last one to start, inside the main thread
        except KeyboardInterrupt:
            logger.info("Keyboard interrupt. Stopping Slack and Coordinator")
            slack_runner.stop()
-            print("BYE!")
+            logger.info("BYE!")
            # coordinator was set as a daemon thread, so it will be stopped automatically
            sys.exit(0)
--- a/news_fetch/app/utils_check/runner.py
+++ b/news_fetch/app/utils_check/runner.py
@@ -23,7 +23,7 @@ u_options = {


 bot_client = WebClient(
-    token = configuration.parsed["SLACK"]["auth_token"]
+    token = configuration.main_config["SLACK"]["auth_token"]
 )


@@ -70,7 +70,7 @@ def send_reaction_to_slack_thread(article, reaction):
        else:
            ts = m.slack_ts
            bot_client.reactions_add(
-                channel=configuration.parsed["SLACK"]["archive_id"],
+                channel=configuration.main_config["SLACK"]["archive_id"],
                name=reaction,
                timestamp=ts
            )
--- a/news_fetch/app/utils_mail/runner.py
+++ b/news_fetch/app/utils_mail/runner.py
@@ -7,7 +7,7 @@ import logging
 import configuration

 logger = logging.getLogger(__name__)
-config = configuration.parsed["MAIL"]
+config = configuration.main_config["MAIL"]

 def send(article_model):
    mail = MIMEMultipart()
--- a/news_fetch/utils_slack/runner.py
+++ b/news_fetch/utils_slack/runner.py
@@ -0,0 +1,238 @@
+from slack_bolt import App
+from slack_bolt.adapter.socket_mode import SocketModeHandler
+from slack_sdk.errors import SlackApiError
+
+import logging
+import re
+import time
+
+import configuration
+config = configuration.main_config["SLACK"]
+models = configuration.models
+
+class MessageIsUnwanted(Exception):
+    # This exception is triggered when the message is either threaded (reply to another message) or weird (like an edit, a deletion, etc)
+    pass
+
+class Message:
+    ts = str
+    user_id = str
+    text = str
+    logger = logging.getLogger(__name__)
+
+
+    def __init__(self, message_dict):
+        if message_dict.get("subtype", "not bad") == "message_changed":
+            raise MessageIsUnwanted()
+        if message_dict["type"] == "message":
+            if "thread_ts" in message_dict and (message_dict["thread_ts"] != message_dict["ts"]): # meaning it's a reply to another message
+                raise MessageIsUnwanted()
+            
+            self.user_id = message_dict.get("user", "BAD USER")
+            # self.channel_id = config["archive_id"] # by construction, other messages are not intercepted
+            self.ts = message_dict["ts"]
+            self.text = message_dict["text"]
+
+        else:
+            self.logger.warning(f"What should I do of {message_dict}")
+            raise MessageIsUnwanted()
+
+
+    def __str__(self) -> str:
+        return f"MSG [{self.text}]"
+
+
+    @property
+    def urls(self):
+        pattern = r"<(.*?)>"
+        matches = re.findall(pattern, self.text)
+        matches = [m for m in matches if "." in m] # must contain a tld, right?
+        
+        new_matches = []
+        for m in matches:
+            # further complication: slack automatically abreviates urls in the format: 
+            # <url|link preview>. Lucky for us, "|" is a character derecommended in urls, meaning we can "safely" split for it and retain the first half
+            if "|" in m:
+                keep = m.split("|")[0]
+            else:
+                keep = m
+            new_matches.append(keep)
+        return new_matches
+
+    @property
+    def is_by_human(self):
+        return self.user.user_id != config["bot_id"]
+
+    
+    @property
+    def has_single_url(self):
+        return len(self.urls) == 1
+
+
+
+class BotApp(App):
+    logger = logging.getLogger(__name__)
+
+    def __init__(self, callback, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.callback = callback
+
+
+    def pre_start(self):
+        missed_messages = self.fetch_missed_channel_messages()
+
+        [self.handle_incoming_message(m) for m in missed_messages]
+        self.startup_status()
+
+
+    def say_substitute(self, *args, **kwargs):
+        self.client.chat_postMessage(
+            channel=config["archive_id"],
+            text=" - ".join(args),
+            **kwargs
+        )
+
+    def fetch_missed_channel_messages(self):
+        # latest processed message_ts is:
+        presaved = models.ArticleDownload.select().order_by(models.ArticleDownload.slack_ts.desc()).get_or_none()
+        if presaved is None:
+            last_ts = 0
+        else:
+            last_ts = presaved.slack_ts_full
+
+        result = self.client.conversations_history(
+            channel=config["archive_id"],
+            oldest=last_ts
+        )
+
+        new_messages = result.get("messages", [])
+        # # filter the last one, it is a duplicate! (only if the db is not empty!)
+        # if last_ts != 0 and len(new_messages) != 0:
+        #     new_messages.pop(-1)
+
+        return_messages = [Message(m) for m in new_messages]
+
+        refetch = result.get("has_more", False)
+        while refetch: # we have not actually fetched them all
+            try:
+                result = self.client.conversations_history(
+                    channel = config["archive_id"],
+                    cursor = result["response_metadata"]["next_cursor"],
+                    oldest = last_ts
+                ) # fetches 100 messages, older than the [-1](=oldest) element of new_fetches
+                refetch = result.get("has_more", False)
+
+                new_messages = result.get("messages", [])
+                for m in new_messages:
+                    return_messages.append(Message(m))
+            except SlackApiError: # Most likely a rate-limit
+                self.logger.error("Error while fetching channel messages. (likely rate limit) Retrying in {} seconds...".format(config["api_wait_time"]))
+                time.sleep(config["api_wait_time"])
+                refetch = True
+        
+        self.logger.info(f"Fetched {len(return_messages)} new channel messages.")
+        return return_messages
+
+
+
+    def handle_incoming_message(self, message, say=None):
+        """Reacts to all messages inside channel archiving. This either gets called when catching up on missed messages (by pre_start()) or by the SocketModeHandler in 'live' mode"""
+        if isinstance(message, dict): 
+            try:
+                message = Message(message)
+            except MessageIsUnwanted:
+                return False
+
+        
+        self.logger.info(f"Handling message {message} ({len(message.urls)} urls)")
+
+        
+        if len(message.urls) > 1:
+            self.say_substitute("Only the first url is being handled. Please send any subsequent url as a separate message", thread_ts=message.thread.slack_ts)
+        
+        self.callback(message = message)
+
+
+    def respond_channel_message(self, article, say=None):
+        if say is None:
+            say = self.say_substitute
+        answers = article.slack_info
+        for a in answers:
+            if a["file_path"]:
+                try:
+                    self.client.files_upload(
+                        channels = config["archive_id"],
+                        initial_comment = f"{a['reply_text']}",
+                        file = a["file_path"],
+                        thread_ts = article.slack_ts_full
+                    )
+                    status = True
+                except SlackApiError as e: # upload resulted in an error
+                    say(
+                        "File {} could not be uploaded.".format(a),
+                        thread_ts = article.slack_ts_full
+                    )
+                    status = False
+                    self.logger.error(f"File upload failed: {e}")
+            else: # anticipated that there is no file!
+                say(
+                    f"{a['reply_text']}",
+                    thread_ts = article.slack_ts_full
+                )
+                status = True
+        
+
+    def startup_status(self):
+        """Prints an overview of the articles. This needs to be called here because it should run after having fetched the newly sent messages"""
+        total = models.ArticleDownload.select().count()
+        to_be_processed = models.ArticleDownload.select().where(models.ArticleDownload.title == "").count()
+        unchecked = models.ArticleDownload.select().where(models.ArticleDownload.verified == 0).count()
+        bad = models.ArticleDownload.select().where(models.ArticleDownload.verified == -1).count()
+        not_uploaded = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "").count()
+        self.logger.info(
+            f"[bold]NEWS-FETCH DATABASE STATUS[/bold]: Total entries: {total}; Not yet downloaded: {to_be_processed}; Not yet checked: {unchecked}; Not yet uploaded to archive: {not_uploaded}; Marked as bad: {bad}",
+            extra={"markup": True}
+        )
+
+
+    
+
+
+class BotRunner():
+    logger = logging.getLogger(__name__)
+
+    """Stupid encapsulation so that we can apply the slack decorators to the BotApp"""
+    def __init__(self, callback, *args, **kwargs) -> None:
+        self.bot_worker = BotApp(callback, token=config["auth_token"])
+
+        @self.bot_worker.event(event="message", matchers=[is_message_in_archiving])
+        def handle_incoming_message(message, say):
+            return self.bot_worker.handle_incoming_message(message, say)
+
+        # @self.bot_worker.event(event="reaction_added", matchers=[is_reaction_in_archiving])
+        # def handle_incoming_reaction(event, say):
+        #     return self.bot_worker.handle_incoming_reaction(event)
+
+        @self.bot_worker.event(event="event")
+        def handle_all_other_reactions(event, say):
+            self.logger.log("Ignoring slack event that isn't a message")
+
+        self.handler = SocketModeHandler(self.bot_worker, config["app_token"])
+
+
+    def start(self):
+        self.bot_worker.pre_start()
+        self.handler.start()
+
+
+    def stop(self):
+        self.handler.close()
+        self.logger.info("Closed Slack-Socketmodehandler")
+
+
+
+
+
+def is_message_in_archiving(message) -> bool:
+    return message["channel"] == config["archive_id"]
+
--- a/news_fetch/utils_storage/helpers.py
+++ b/news_fetch/utils_storage/helpers.py
@@ -0,0 +1,10 @@
+def clear_path_name(path):
+    keepcharacters = (' ','.','_', '-')
+    converted = "".join([c if (c.isalnum() or c in keepcharacters) else "_" for c in path]).rstrip()
+    return converted
+
+def shorten_name(name, offset = 50):
+    if len(name) > offset:
+        return name[:offset] + "..."
+    else:
+        return name
--- a/news_fetch/app/utils_storage/migrations/migration.001.py
+++ b/news_fetch/app/utils_storage/migrations/migration.001.py
--- a/news_fetch/utils_storage/models.py
+++ b/news_fetch/utils_storage/models.py
@@ -0,0 +1,297 @@
+import logging
+logger = logging.getLogger(__name__)
+
+from peewee import *
+import os
+import markdown
+import re
+import configuration
+import datetime
+
+from . import helpers
+config = configuration.main_config["DOWNLOADS"]
+slack_config = configuration.main_config["SLACK"]
+
+# set the nature of the db at runtime
+download_db = DatabaseProxy()
+
+
+class DownloadBaseModel(Model):
+    class Meta:
+        database = download_db
+
+
+
+## == Article related models == ##
+class ArticleDownload(DownloadBaseModel):
+    # in the beginning this is all we have
+    article_url = TextField(default = '', unique=True)
+    
+    # fetch then fills in the metadata
+    title = CharField(default='')
+    @property
+    def is_title_bad(self):  # add incrementally
+        return "PUR-Abo" in self.title \
+            or "Redirecting" in self.title \
+            or "Error while running fetch" in self.title
+
+    summary = TextField(default = '')
+    source_name = CharField(default = '')
+    language = CharField(default = '')
+
+
+    file_name = TextField(default = '')
+    @property
+    def save_path(self):
+        return f"{config['local_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/"
+    @property
+    def fname_nas(self, file_name=""):
+        if self.download_date:
+            if file_name:
+                return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{file_name}"
+            else: # return the self. name
+                return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{self.file_name}"
+        else:
+            return None
+    @property
+    def fname_template(self):
+        if "youtube.com" in self.source_name or "youtu.be" in self.source_name:
+            fname = f"{self.source_name} -- {self.title}"
+        else:
+            fname = f"{self.source_name} -- {self.title}.pdf"
+        return helpers.clear_path_name(fname)
+
+    
+    archive_url = TextField(default = '')
+    pub_date = DateField(default = '')
+    download_date = DateField(default = datetime.date.today)
+
+    slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by
+    @property
+    def slack_ts_full(self):
+        str_ts = str(self.slack_ts)
+        cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals
+        return f"{str_ts}{cut_zeros * '0'}"
+
+    sent = BooleanField(default = False)
+    
+    archived_by = CharField(default = os.getenv("UNAME"))
+    # need to know who saved the message because the file needs to be on their computer in order to get verified
+    # verification happens in a different app, but the model has the fields here as well
+    comment = TextField(default = '')
+    verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad
+
+    # authors
+    # keywords
+    # ... are added through foreignkeys
+    # we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db
+
+
+    ## Helpers specific to a single article
+    def __str__(self) -> str:
+        if self.title != '' and self.source_name != '':
+            desc = f"{helpers.shorten_name(self.title)} -- {self.source_name}"
+        else:
+            desc = f"{self.article_url}"
+        return f"ART [{desc}]"
+
+    @property
+    def slack_info(self):
+        status = [":x: No better version available", ":gear: Verification pending", ":white_check_mark: Verified by human"][self.verified + 1]
+        content = "\n>" + "\n>".join(self.summary.split("\n"))
+        file_status, msg = self.file_status()
+        if not file_status:
+            return [msg]
+        
+        # everything alright: generate real content
+        # first the base file
+        if self.file_name[-4:] == ".pdf":
+            answer = [{ # main reply with the base pdf
+                "reply_text" : f"*{self.title}*\n{status}\n{content}",
+                "file_path" : self.save_path + self.file_name 
+            }]
+        else: # don't upload if the file is too big!
+            location = f"Not uploaded to slack, but the file will be on the NAS:\n`{self.fname_nas}`"
+            answer = [{ # main reply with the base pdf
+                "reply_text" : f"*{self.title}*\n{status}\n{content}\n{location}",
+                "file_path" : None 
+            }]
+
+        # then the related files
+        rel_text = ""
+        for r in self.related:
+            fname = r.related_file_name
+            lentry = "\n• `{}` ".format(self.fname_nas(fname))
+            if fname[-4:] == ".pdf": # this is a manageable file, directly upload
+                f_ret = self.save_path + fname
+                answer.append({"reply_text":"", "file_path" : f_ret})
+            else: # not pdf <=> too large. Don't upload but mention its existence
+                lentry += "(not uploaded to slack, but the file will be on the NAS)"
+                
+            rel_text += lentry
+
+        if rel_text:
+            rel_text = answer[0]["reply_text"] = answer[0]["reply_text"] + "\nRelated files:\n" + rel_text
+        
+        return answer
+
+    @property
+    def mail_info(self):
+        base = [{"reply_text": f"[{self.article_url}]({self.article_url})\n", "file_path":None}] + self.slack_info
+        return [{"reply_text": markdown.markdown(m["reply_text"]), "file_path": m["file_path"]} for m in base]
+
+
+    def set_authors(self, authors):
+        for a in authors:
+            ArticleAuthor.create(
+                article = self,
+                author = a
+                )
+
+    def set_related(self, related):
+        for r in related:
+            ArticleRelated.create(
+                article = self,
+                related_file_name = r
+            )
+
+    def file_status(self):
+        if not self.file_name:
+            logger.error(f"Article {self} has no filename!")
+            return False, {"reply_text": "Download failed, no file was saved.", "file_path": None}
+        
+        file_path_abs = self.save_path + self.file_name
+        if not os.path.exists(file_path_abs):
+            logger.error(f"Article {self} has a filename, but the file does not exist at that location!")
+            return False, {"reply_text": "Can't find file. Either the download failed or the file was moved.", "file_path": None}
+
+        return True, {}
+
+
+class ArticleAuthor(DownloadBaseModel):
+    article = ForeignKeyField(ArticleDownload, backref='authors')
+    author = CharField()
+
+
+class ArticleRelated(DownloadBaseModel):
+    # Related files, such as the full text of a paper, audio files, etc.
+    article = ForeignKeyField(ArticleDownload, backref='related')
+    related_file_name = TextField(default = '')
+
+
+
+
+
+# class Thread(ChatBaseModel):
+#     """The threads that concern us are only created if the base massage contains a url"""
+#     thread_ts = FloatField(default = 0)
+#     article = ForeignKeyField(ArticleDownload, backref="slack_thread", null=True, default=None)
+#     # provides, ts, user, models
+#     # messages
+
+#     @property
+#     def slack_ts(self):
+#         str_ts = str(self.thread_ts)
+#         cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem!
+#         return "{}{}".format(str_ts, cut_zeros*"0")
+
+#     @property
+#     def initiator_message(self):
+#         try:
+#             return self.messages[0] # TODO check if this needs sorting
+#         except IndexError:
+#             logger.warning(f"Thread {self} is empty. How can that be?")
+#             return None
+
+#     @property
+#     def message_count(self):
+#         # logger.warning("message_count was called")
+#         return self.messages.count()
+
+#     @property
+#     def last_message(self):
+#         messages = Message.select().where(Message.thread == self).order_by(Message.ts) # can't be empty by definition/creation
+#         return messages[-1]
+
+#     @property
+#     def is_fully_processed(self) -> bool:
+#         init_message = self.initiator_message
+#         if init_message is None:
+#             return False
+        
+#         if init_message.is_processed_override:
+#             return True
+#         # this override is set for instance, when no url was sent at all. Then set this thread to be ignored
+        
+#         reactions = init_message.reaction
+#         if not reactions:
+#             return False
+#         else:
+#             r = reactions[0].type # can and should only have one reaction
+#             return r == "white_check_mark" \
+#                 or r == "x"
+
+
+    
+# class Message(ChatBaseModel):
+#     ts = FloatField(unique=True) #for sorting
+#     channel_id = CharField(default='')
+#     user = ForeignKeyField(User, backref="messages")
+#     text = TextField(default='')
+#     thread = ForeignKeyField(Thread, backref="messages", default=None)
+#     file_type = CharField(default='')
+#     perma_link = CharField(default='')
+#     is_processed_override = BooleanField(default=False)
+#     # reaction
+
+#     def __str__(self) -> str:
+#         return "MSG [{}]".format(shorten_name(self.text).replace('\n','/'))
+
+#     @property
+#     def slack_ts(self):
+#         str_ts = str(self.ts)
+#         cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem!
+#         return "{}{}".format(str_ts, cut_zeros * "0")
+
+
+#     @property
+#     def urls(self):
+#         pattern = r"<(.*?)>"
+#         matches = re.findall(pattern, self.text)
+#         matches = [m for m in matches if "." in m]
+        
+#         new_matches = []
+#         for m in matches:
+#             if "." in m:  # must contain a tld, right?
+#                 # further complication: slack automatically abreviates urls in the format: 
+#                 # <url|link preview>. Lucky for us, "|" is a character derecommended in urls, meaning we can "safely" split for it and retain the first half
+#                 if "|" in m:
+#                     keep = m.split("|")[0]
+#                 else:
+#                     keep = m
+#                 new_matches.append(keep)
+#         return new_matches
+    
+#     @property
+#     def is_by_human(self):
+#         return self.user.user_id != slack_config["bot_id"]
+
+    
+#     @property
+#     def has_single_url(self):
+#         return len(self.urls) == 1
+
+
+
+
+
+
+
+
+
+def set_db(download_db_object):
+    download_db.initialize(download_db_object)
+    with download_db: # create tables (does nothing if they exist already)
+        download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated])
+
+
--- a/news_fetch/app/utils_worker/_init__.py
+++ b/news_fetch/app/utils_worker/_init__.py
--- a/news_fetch/app/utils_worker/compress/runner.py
+++ b/news_fetch/app/utils_worker/compress/runner.py
@@ -5,7 +5,7 @@ from pathlib import Path
 import logging
 logger = logging.getLogger(__name__)
 import configuration
-config = configuration.parsed["DOWNLOADS"]
+config = configuration.main_config["DOWNLOADS"]

 shrink_sizes = []

--- a/news_fetch/app/utils_worker/download/init.py
+++ b/news_fetch/app/utils_worker/download/init.py
--- a/news_fetch/app/utils_worker/download/browser.py
+++ b/news_fetch/app/utils_worker/download/browser.py
@@ -8,7 +8,7 @@ from selenium import webdriver
 import configuration
 import json

-config = configuration.parsed["DOWNLOADS"]
+config = configuration.main_config["DOWNLOADS"]
 blacklisted = json.loads(config["blacklisted_href_domains"])


@@ -25,10 +25,10 @@ class PDFDownloader:
        options.profile = config["browser_profile_path"]
        # should be options.set_preference("profile", config["browser_profile_path"]) as of selenium 4 but that doesn't work

-        if os.getenv("HEADLESS", "false") == "true":
-            options.add_argument('--headless')
+        if os.getenv("DEBUG", "false") == "true":
+            self.logger.warning("Opening browser GUI because of 'DEBUG=true'")
        else:
-            self.logger.warning("Opening browser GUI because of 'HEADLESS=false'")
+            options.add_argument('--headless')

        options.set_preference('print.save_as_pdf.links.enabled', True)
        # Just save if the filetype is pdf already
@@ -92,7 +92,7 @@ class PDFDownloader:

        # in the mean time, get a page title if required
        if article_object.is_title_bad:
-            article_object.title = self.driver.title.replace(".pdf", "")
+            article_object.title = self.driver.title.replace(".pdf", "") # some titles end with .pdf
            # will be propagated to the saved file (dst) as well

        fname = article_object.fname_template
@@ -112,7 +112,6 @@ class PDFDownloader:

        if success:
            article_object.file_name = fname
-            article_object.set_references(self.get_references())
        else:
            article_object.file_name = ""
        
@@ -150,18 +149,6 @@ class PDFDownloader:
            return False
        

-    def get_references(self):
-        try:
-            hrefs = [e.get_attribute("href") for e in self.driver.find_elements_by_xpath("//a[@href]")]
-        except:
-            hrefs = []
-        # len_old = len(hrefs)
-        hrefs = [h for h in hrefs \
-            if not sum([(domain in h) for domain in blacklisted]) # sum([True, False, False, False]) == 1 (esp. not 0)
-            ] # filter a tiny bit at least
-        # self.logger.info(f"Hrefs filtered (before: {len_old}, after: {len(hrefs)})")
-        return hrefs
-



--- a/news_fetch/app/utils_worker/download/runner.py
+++ b/news_fetch/app/utils_worker/download/runner.py
--- a/news_fetch/app/utils_worker/download/youtube.py
+++ b/news_fetch/app/utils_worker/download/youtube.py
--- a/news_fetch/app/utils_worker/fetch/runner.py
+++ b/news_fetch/app/utils_worker/fetch/runner.py
@@ -54,9 +54,4 @@ def get_description(article_object):
    except AttributeError:
        pass # list would have been empty anyway
        
-    try:
-        article_object.set_keywords(news_article.keywords)
-    except AttributeError:
-        pass # list would have been empty anyway
-    
    return article_object
--- a/news_fetch/app/utils_worker/upload/runner.py
+++ b/news_fetch/app/utils_worker/upload/runner.py
--- a/news_fetch/app/utils_worker/worker_template.py
+++ b/news_fetch/app/utils_worker/worker_template.py
--- a/news_fetch/app/utils_worker/workers.py
+++ b/news_fetch/app/utils_worker/workers.py