Compare commits
	
		
			2 Commits
		
	
	
		
			main
			...
			77c96be844
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 77c96be844 | |||
| e1a8dabd2c | 
							
								
								
									
										30
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										30
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -1,5 +1,33 @@
 | 
				
			|||||||
.dev/
 | 
					.dev/
 | 
				
			||||||
 | 
					.vscode/
 | 
				
			||||||
*.pyc
 | 
					*.pyc
 | 
				
			||||||
*.log
 | 
					*.log
 | 
				
			||||||
__pycache__/
 | 
					__pycache__/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## svelte:
 | 
				
			||||||
 | 
					# Logs
 | 
				
			||||||
 | 
					logs
 | 
				
			||||||
 | 
					*.log
 | 
				
			||||||
 | 
					npm-debug.log*
 | 
				
			||||||
 | 
					yarn-debug.log*
 | 
				
			||||||
 | 
					yarn-error.log*
 | 
				
			||||||
 | 
					pnpm-debug.log*
 | 
				
			||||||
 | 
					lerna-debug.log*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					node_modules
 | 
				
			||||||
 | 
					dist
 | 
				
			||||||
 | 
					dist-ssr
 | 
				
			||||||
 | 
					*.local
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Editor directories and files
 | 
				
			||||||
 | 
					.vscode/*
 | 
				
			||||||
 | 
					!.vscode/extensions.json
 | 
				
			||||||
 | 
					.idea
 | 
				
			||||||
 | 
					.DS_Store
 | 
				
			||||||
 | 
					*.suo
 | 
				
			||||||
 | 
					*.ntvs*
 | 
				
			||||||
 | 
					*.njsproj
 | 
				
			||||||
 | 
					*.sln
 | 
				
			||||||
 | 
					*.sw?
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,25 +1,8 @@
 | 
				
			|||||||
# Usage:
 | 
					 | 
				
			||||||
# docker compose --env-file env/<mode> run <args> news_fetch && docker-compose --env-file env/production down
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
version: "3.9"
 | 
					version: "3.9"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
services:
 | 
					services:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  geckodriver:
 | 
					  vpn: # Creates a connection behind the ETH Firewall to access NAS and Postgres
 | 
				
			||||||
    image: selenium/standalone-firefox:103.0
 | 
					 | 
				
			||||||
    volumes:
 | 
					 | 
				
			||||||
      - ${XSOCK-/dev/null}:${XSOCK-/tmp/sock}
 | 
					 | 
				
			||||||
      - ${XAUTHORITY-/dev/null}:/home/auto_news/.Xauthority
 | 
					 | 
				
			||||||
    environment:
 | 
					 | 
				
			||||||
      - DISPLAY=$DISPLAY
 | 
					 | 
				
			||||||
      - START_VNC=false
 | 
					 | 
				
			||||||
      - START_XVFB=false
 | 
					 | 
				
			||||||
    user: 1001:1001
 | 
					 | 
				
			||||||
    expose: # exposed to other docker-compose services only
 | 
					 | 
				
			||||||
    - "4444"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  vpn:
 | 
					 | 
				
			||||||
    image: wazum/openconnect-proxy:latest
 | 
					    image: wazum/openconnect-proxy:latest
 | 
				
			||||||
    env_file:
 | 
					    env_file:
 | 
				
			||||||
      - ${CONTAINER_DATA}/config/vpn.config
 | 
					      - ${CONTAINER_DATA}/config/vpn.config
 | 
				
			||||||
@@ -28,13 +11,14 @@ services:
 | 
				
			|||||||
    volumes:
 | 
					    volumes:
 | 
				
			||||||
      - /dev/net/tun:/dev/net/tun
 | 
					      - /dev/net/tun:/dev/net/tun
 | 
				
			||||||
    # alternative to cap_add & volumes: specify privileged: true
 | 
					    # alternative to cap_add & volumes: specify privileged: true
 | 
				
			||||||
 | 
					    expose: ["5432"] # exposed here because db_passhtrough uses this network. See below for more details
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  nas_sync:
 | 
					  nas_sync: # Syncs locally downloaded files with the NAS-share on nas22.ethz.ch/...
 | 
				
			||||||
    depends_on:
 | 
					    depends_on:
 | 
				
			||||||
      - vpn # used to establish a connection to the SMB server
 | 
					      - vpn
 | 
				
			||||||
    network_mode: "service:vpn"
 | 
					    network_mode: "service:vpn" # used to establish a connection to the SMB server from inside ETH network
 | 
				
			||||||
    build: nas_sync
 | 
					    build: nas_sync # local folder to build
 | 
				
			||||||
    image: nas_sync:latest
 | 
					    image: nas_sync:latest
 | 
				
			||||||
    cap_add: # capabilities needed for mounting the SMB share
 | 
					    cap_add: # capabilities needed for mounting the SMB share
 | 
				
			||||||
      - SYS_ADMIN
 | 
					      - SYS_ADMIN
 | 
				
			||||||
@@ -49,27 +33,56 @@ services:
 | 
				
			|||||||
      - /sync/nas_sync.config
 | 
					      - /sync/nas_sync.config
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  news_fetch:
 | 
					  geckodriver: # separate docker container for pdf-download. This hugely improves stability (and creates shorter build times for the containers)
 | 
				
			||||||
 | 
					    image: selenium/standalone-firefox:103.0 # latest version because it mirrors the locally installed version (which is automatically updated)
 | 
				
			||||||
 | 
					    environment:
 | 
				
			||||||
 | 
					      - START_VNC=${HEADFULL-false} # as opposed to headless, used when requiring supervision (eg. for websites that crash)
 | 
				
			||||||
 | 
					      - START_XVFB=${HEADFULL-false}
 | 
				
			||||||
 | 
					    expose: ["4444"] # exposed to other docker-compose services only
 | 
				
			||||||
 | 
					    ports:
 | 
				
			||||||
 | 
					      - 7900:7900 # port for webvnc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  db_passthrough: # Allows a container on the local network to connect to a service (here postgres) through the vpn
 | 
				
			||||||
 | 
					    network_mode: "service:vpn"
 | 
				
			||||||
 | 
					    image: alpine/socat:latest
 | 
				
			||||||
 | 
					    command: ["tcp-listen:5432,reuseaddr,fork", "tcp-connect:id-hdb-psgr-cp48.ethz.ch:5432"]
 | 
				
			||||||
 | 
					    # expose: ["5432"] We would want this passthrough to expose its ports to the other containers
 | 
				
			||||||
 | 
					    # BUT since it uses the same network as the vpn-service, it can't expose ports on its own. 5432 is therefore exposed under service.vpn.expose 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  news_fetch: # Orchestration of the automatic download. It generates pdfs (via the geckodriver container), fetches descriptions, triggers a snaphsot (on archive.org) and writes to a db
 | 
				
			||||||
    build: news_fetch
 | 
					    build: news_fetch
 | 
				
			||||||
    image: news_fetch:latest
 | 
					    image: news_fetch:latest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    depends_on: # when using docker compose run news_fetch, the dependencies are started as well
 | 
					    depends_on: # when using docker compose run news_fetch, the dependencies are started as well
 | 
				
			||||||
      - nas_sync
 | 
					      - nas_sync
 | 
				
			||||||
      - geckodriver
 | 
					      - geckodriver
 | 
				
			||||||
 | 
					      - db_passthrough
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    volumes:
 | 
					    volumes:
 | 
				
			||||||
      - ${CONTAINER_DATA}:/app/containerdata # always set
 | 
					      - ${CONTAINER_DATA}:/app/containerdata # always set
 | 
				
			||||||
      - ${CODE:-/dev/null}:/code # not set in prod, defaults to /dev/null
 | 
					      - ${CODE:-/dev/null}:/code # not set in prod, defaults to /dev/null
 | 
				
			||||||
      - ${XSOCK-/dev/null}:${XSOCK-/tmp/sock} # x11 socket, needed for gui
 | 
					 | 
				
			||||||
      # - ${XAUTHORITY-/dev/null}:/home/auto_news/.Xauthority # xauth needed for authenticating to x11
 | 
					 | 
				
			||||||
    environment:
 | 
					    environment:
 | 
				
			||||||
      - DISPLAY=$DISPLAY # needed to let x11 apps know where to connect to
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      - DEBUG=${DEBUG}
 | 
					      - DEBUG=${DEBUG}
 | 
				
			||||||
      - CHECK=${CHECK}
 | 
					      - UNAME=${UNAME}
 | 
				
			||||||
      - UPLOAD=${UPLOAD}
 | 
					    entrypoint: ${ENTRYPOINT:-python runner.py} # by default launch workers as defined in the Dockerfile
 | 
				
			||||||
      - HEADLESS=${HEADLESS}
 | 
					    # stdin_open: ${INTERACTIVE:-false} # docker run -i
 | 
				
			||||||
      - REDUCEDFETCH=${REDUCEDFETCH}
 | 
					    # tty: ${INTERACTIVE:-false}        # docker run -t
 | 
				
			||||||
    entrypoint: ${ENTRYPOINT:-python3 runner.py} # by default launch workers as defined in the Dockerfile
 | 
					
 | 
				
			||||||
    stdin_open: ${INTERACTIVE:-false} # docker run -i
 | 
					
 | 
				
			||||||
    tty: ${INTERACTIVE:-false}        # docker run -t
 | 
					  news_check: # Creates a small webapp on http://localhost:8080 to check previously generated pdfs (some of which are unusable and must be marked as such)
 | 
				
			||||||
 | 
					    build: news_check
 | 
				
			||||||
 | 
					    image: news_check:latest
 | 
				
			||||||
 | 
					    # user: 1001:1001 # since the app writes files to the local filesystem, it must be run as the current user
 | 
				
			||||||
 | 
					    depends_on:
 | 
				
			||||||
 | 
					      - db_passthrough
 | 
				
			||||||
 | 
					    volumes:
 | 
				
			||||||
 | 
					      - ${CONTAINER_DATA}:/app/containerdata # always set
 | 
				
			||||||
 | 
					      - ${CODE:-/dev/null}:/code # not set in prod, defaults to /dev/null
 | 
				
			||||||
 | 
					    environment:
 | 
				
			||||||
 | 
					      - UNAME=${UNAME}
 | 
				
			||||||
 | 
					    ports:
 | 
				
			||||||
 | 
					      - "8080:80" # 80 inside container
 | 
				
			||||||
 | 
					    entrypoint: ${ENTRYPOINT:-python app.py} # by default launch workers as defined in the Dockerfile
 | 
				
			||||||
 | 
					    tty: true
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										2
									
								
								env/debug
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								env/debug
									
									
									
									
										vendored
									
									
								
							@@ -3,8 +3,6 @@
 | 
				
			|||||||
CONTAINER_DATA=~/Bulk/COSS/Downloads/coss_archiving
 | 
					CONTAINER_DATA=~/Bulk/COSS/Downloads/coss_archiving
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CODE=./
 | 
					CODE=./
 | 
				
			||||||
XAUTHORTIY=$XAUTHORTIY
 | 
					 | 
				
			||||||
XSOCK=/tmp/.X11-unix
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
DEBUG=true
 | 
					DEBUG=true
 | 
				
			||||||
CHECK=false
 | 
					CHECK=false
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										44
									
								
								launch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								launch
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,44 @@
 | 
				
			|||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					set -e
 | 
				
			||||||
 | 
					set -o ignoreeof
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo "Bash script launching COSS_ARCHIVING..."
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# CHANGE ME!
 | 
				
			||||||
 | 
					export CONTAINER_DATA=~/Bulk/COSS/Downloads/coss_archiving
 | 
				
			||||||
 | 
					export UNAME=remy
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if [[ $1 == "debug" ]]
 | 
				
			||||||
 | 
					then
 | 
				
			||||||
 | 
					    export DEBUG=true
 | 
				
			||||||
 | 
					    export HEADFULL=true
 | 
				
			||||||
 | 
					    export CODE=./
 | 
				
			||||||
 | 
					    export ENTRYPOINT=/bin/bash
 | 
				
			||||||
 | 
					    # since service ports is not enough here, also execute up, which will
 | 
				
			||||||
 | 
					    docker compose up -d
 | 
				
			||||||
 | 
					elif [[ $1 == "production" ]]
 | 
				
			||||||
 | 
					then
 | 
				
			||||||
 | 
					    export DEBUG=false
 | 
				
			||||||
 | 
					elif [[ $1 == "build" ]]
 | 
				
			||||||
 | 
					then
 | 
				
			||||||
 | 
					    export DEBUG=false
 | 
				
			||||||
 | 
					    docker compose build
 | 
				
			||||||
 | 
					    exit 0
 | 
				
			||||||
 | 
					elif [[ $1 == "down" ]]
 | 
				
			||||||
 | 
					then
 | 
				
			||||||
 | 
					    docker compose stop
 | 
				
			||||||
 | 
					    exit 0
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
 | 
					    echo "Please specify the execution mode (debug/production/build) as the first argument"
 | 
				
			||||||
 | 
					    exit 1
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					shift # consumes the variable set in $1 so that $@ only contains the remaining arguments
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					docker compose run -it --service-ports "$@"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo "Docker run finished, shutting down containers..."
 | 
				
			||||||
 | 
					docker compose stop
 | 
				
			||||||
 | 
					echo "Bye!"
 | 
				
			||||||
@@ -15,7 +15,7 @@ runner.configuration.models.set_db(
 | 
				
			|||||||
    runner.configuration.SqliteDatabase("../.dev/media_message_dummy.db"),  # chat_db (not needed here)
 | 
					    runner.configuration.SqliteDatabase("../.dev/media_message_dummy.db"),  # chat_db (not needed here)
 | 
				
			||||||
    runner.configuration.SqliteDatabase("../.dev/media_downloads.db")
 | 
					    runner.configuration.SqliteDatabase("../.dev/media_downloads.db")
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
runner.configuration.parsed["DOWNLOADS"]["local_storage_path"] = "../.dev/"
 | 
					runner.configuration.main_config["DOWNLOADS"]["local_storage_path"] = "../.dev/"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def fetch():
 | 
					def fetch():
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										12
									
								
								misc/sample_config/nas_sync.config
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								misc/sample_config/nas_sync.config
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,12 @@
 | 
				
			|||||||
 | 
					settings {
 | 
				
			||||||
 | 
					   logfile    = "/tmp/lsyncd.log",
 | 
				
			||||||
 | 
					   statusFile = "/tmp/lsyncd.status",
 | 
				
			||||||
 | 
					   nodaemon   = true,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sync {
 | 
				
			||||||
 | 
					   default.rsync,
 | 
				
			||||||
 | 
					   source = "/sync/local_files",
 | 
				
			||||||
 | 
					   target = "/sync/remote_files",
 | 
				
			||||||
 | 
					   init = false,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										25
									
								
								news_check/Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								news_check/Dockerfile
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,25 @@
 | 
				
			|||||||
 | 
					FROM node:18.8 as build-deps
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					WORKDIR /app/client
 | 
				
			||||||
 | 
					COPY client/package.json ./
 | 
				
			||||||
 | 
					COPY client/package-lock.json ./
 | 
				
			||||||
 | 
					COPY client/rollup.config.js ./
 | 
				
			||||||
 | 
					COPY client/src ./src/
 | 
				
			||||||
 | 
					RUN npm install
 | 
				
			||||||
 | 
					RUN npm run build
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					FROM python:latest
 | 
				
			||||||
 | 
					ENV TZ Europe/Zurich
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					WORKDIR /app/news_check
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					COPY requirements.txt requirements.txt
 | 
				
			||||||
 | 
					RUN python3 -m pip install -r requirements.txt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					COPY client/public/index.html client/public/index.html
 | 
				
			||||||
 | 
					COPY --from=build-deps /app/client/public client/public/
 | 
				
			||||||
 | 
					COPY server server/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					WORKDIR /app/news_check/server
 | 
				
			||||||
 | 
					# CMD python app.py
 | 
				
			||||||
							
								
								
									
										4
									
								
								news_check/client/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								news_check/client/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,4 @@
 | 
				
			|||||||
 | 
					/node_modules/
 | 
				
			||||||
 | 
					/public/build/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.DS_Store
 | 
				
			||||||
							
								
								
									
										107
									
								
								news_check/client/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								news_check/client/README.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,107 @@
 | 
				
			|||||||
 | 
					# This repo is no longer maintained. Consider using `npm init vite` and selecting the `svelte` option or — if you want a full-fledged app framework and don't mind using pre-1.0 software — use [SvelteKit](https://kit.svelte.dev), the official application framework for Svelte.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# svelte app
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This is a project template for [Svelte](https://svelte.dev) apps. It lives at https://github.com/sveltejs/template.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					To create a new project based on this template using [degit](https://github.com/Rich-Harris/degit):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					npx degit sveltejs/template svelte-app
 | 
				
			||||||
 | 
					cd svelte-app
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					*Note that you will need to have [Node.js](https://nodejs.org) installed.*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Get started
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Install the dependencies...
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					cd svelte-app
 | 
				
			||||||
 | 
					npm install
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					...then start [Rollup](https://rollupjs.org):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					npm run dev
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Navigate to [localhost:8080](http://localhost:8080). You should see your app running. Edit a component file in `src`, save it, and reload the page to see your changes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					By default, the server will only respond to requests from localhost. To allow connections from other computers, edit the `sirv` commands in package.json to include the option `--host 0.0.0.0`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you're using [Visual Studio Code](https://code.visualstudio.com/) we recommend installing the official extension [Svelte for VS Code](https://marketplace.visualstudio.com/items?itemName=svelte.svelte-vscode). If you are using other editors you may need to install a plugin in order to get syntax highlighting and intellisense.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Building and running in production mode
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					To create an optimised version of the app:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					npm run build
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You can run the newly built app with `npm run start`. This uses [sirv](https://github.com/lukeed/sirv), which is included in your package.json's `dependencies` so that the app will work when you deploy to platforms like [Heroku](https://heroku.com).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Single-page app mode
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					By default, sirv will only respond to requests that match files in `public`. This is to maximise compatibility with static fileservers, allowing you to deploy your app anywhere.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you're building a single-page app (SPA) with multiple routes, sirv needs to be able to respond to requests for *any* path. You can make it so by editing the `"start"` command in package.json:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```js
 | 
				
			||||||
 | 
					"start": "sirv public --single"
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Using TypeScript
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This template comes with a script to set up a TypeScript development environment, you can run it immediately after cloning the template with:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					node scripts/setupTypeScript.js
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Or remove the script via:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					rm scripts/setupTypeScript.js
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you want to use `baseUrl` or `path` aliases within your `tsconfig`, you need to set up `@rollup/plugin-alias` to tell Rollup to resolve the aliases. For more info, see [this StackOverflow question](https://stackoverflow.com/questions/63427935/setup-tsconfig-path-in-svelte).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Deploying to the web
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### With [Vercel](https://vercel.com)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Install `vercel` if you haven't already:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					npm install -g vercel
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Then, from within your project folder:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					cd public
 | 
				
			||||||
 | 
					vercel deploy --name my-project
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### With [surge](https://surge.sh/)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Install `surge` if you haven't already:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					npm install -g surge
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Then, from within your project folder:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					npm run build
 | 
				
			||||||
 | 
					surge public my-project.surge.sh
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
							
								
								
									
										1955
									
								
								news_check/client/package-lock.json
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
									
										1955
									
								
								news_check/client/package-lock.json
									
									
									
										generated
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										23
									
								
								news_check/client/package.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								news_check/client/package.json
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,23 @@
 | 
				
			|||||||
 | 
					{
 | 
				
			||||||
 | 
					  "name": "svelte-app",
 | 
				
			||||||
 | 
					  "version": "1.0.0",
 | 
				
			||||||
 | 
					  "private": true,
 | 
				
			||||||
 | 
					  "scripts": {
 | 
				
			||||||
 | 
					    "build": "rollup -c",
 | 
				
			||||||
 | 
					    "dev": "rollup -c -w",
 | 
				
			||||||
 | 
					    "start": "sirv public --no-clear"
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					  "devDependencies": {
 | 
				
			||||||
 | 
					    "@rollup/plugin-commonjs": "^17.0.0",
 | 
				
			||||||
 | 
					    "@rollup/plugin-node-resolve": "^11.0.0",
 | 
				
			||||||
 | 
					    "rollup": "^2.3.4",
 | 
				
			||||||
 | 
					    "rollup-plugin-css-only": "^3.1.0",
 | 
				
			||||||
 | 
					    "rollup-plugin-livereload": "^2.0.0",
 | 
				
			||||||
 | 
					    "rollup-plugin-svelte": "^7.0.0",
 | 
				
			||||||
 | 
					    "rollup-plugin-terser": "^7.0.0",
 | 
				
			||||||
 | 
					    "svelte": "^3.0.0"
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					  "dependencies": {
 | 
				
			||||||
 | 
					    "sirv-cli": "^2.0.0"
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										
											BIN
										
									
								
								news_check/client/public/favicon.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								news_check/client/public/favicon.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 3.1 KiB  | 
							
								
								
									
										63
									
								
								news_check/client/public/global.css
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								news_check/client/public/global.css
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,63 @@
 | 
				
			|||||||
 | 
					html, body {
 | 
				
			||||||
 | 
						position: relative;
 | 
				
			||||||
 | 
						width: 100%;
 | 
				
			||||||
 | 
						height: 100%;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					body {
 | 
				
			||||||
 | 
						color: #333;
 | 
				
			||||||
 | 
						margin: 0;
 | 
				
			||||||
 | 
						padding: 8px;
 | 
				
			||||||
 | 
						box-sizing: border-box;
 | 
				
			||||||
 | 
						font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					a {
 | 
				
			||||||
 | 
						color: rgb(0,100,200);
 | 
				
			||||||
 | 
						text-decoration: none;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					a:hover {
 | 
				
			||||||
 | 
						text-decoration: underline;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					a:visited {
 | 
				
			||||||
 | 
						color: rgb(0,80,160);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					label {
 | 
				
			||||||
 | 
						display: block;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					input, button, select, textarea {
 | 
				
			||||||
 | 
						font-family: inherit;
 | 
				
			||||||
 | 
						font-size: inherit;
 | 
				
			||||||
 | 
						-webkit-padding: 0.4em 0;
 | 
				
			||||||
 | 
						padding: 0.4em;
 | 
				
			||||||
 | 
						margin: 0 0 0.5em 0;
 | 
				
			||||||
 | 
						box-sizing: border-box;
 | 
				
			||||||
 | 
						border: 1px solid #ccc;
 | 
				
			||||||
 | 
						border-radius: 2px;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					input:disabled {
 | 
				
			||||||
 | 
						color: #ccc;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					button {
 | 
				
			||||||
 | 
						color: #333;
 | 
				
			||||||
 | 
						background-color: #f4f4f4;
 | 
				
			||||||
 | 
						outline: none;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					button:disabled {
 | 
				
			||||||
 | 
						color: #999;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					button:not(:disabled):active {
 | 
				
			||||||
 | 
						background-color: #ddd;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					button:focus {
 | 
				
			||||||
 | 
						border-color: #666;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										25
									
								
								news_check/client/public/index.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								news_check/client/public/index.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,25 @@
 | 
				
			|||||||
 | 
					<!DOCTYPE html>
 | 
				
			||||||
 | 
					<html lang="en">
 | 
				
			||||||
 | 
					<head>
 | 
				
			||||||
 | 
						<meta charset='utf-8'>
 | 
				
			||||||
 | 
						<meta name='viewport' content='width=device-width,initial-scale=1'>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						<title>NEWS CHECK</title>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						<link rel='icon' type='image/png' href='https://ethz.ch/etc/designs/ethz/img/icons/ETH-APP-Icons-Theme-white/192-xxxhpdi.png'>
 | 
				
			||||||
 | 
						<link rel='stylesheet' href='/build/bundle.css'>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						<script defer src='/build/bundle.js'></script>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						<link href="https://cdn.jsdelivr.net/npm/daisyui@2.24.0/dist/full.css" rel="stylesheet" type="text/css" />
 | 
				
			||||||
 | 
						<script src="https://cdn.tailwindcss.com"></script>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.0.943/pdf.min.js"></script>
 | 
				
			||||||
 | 
						<html data-theme="light"></html> <!-- Daisy-ui theme -->
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					</head>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<body>
 | 
				
			||||||
 | 
					</body>
 | 
				
			||||||
 | 
					</html>
 | 
				
			||||||
							
								
								
									
										
											BIN
										
									
								
								news_check/client/public/test.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								news_check/client/public/test.pdf
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										76
									
								
								news_check/client/rollup.config.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								news_check/client/rollup.config.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,76 @@
 | 
				
			|||||||
 | 
					import svelte from 'rollup-plugin-svelte';
 | 
				
			||||||
 | 
					import commonjs from '@rollup/plugin-commonjs';
 | 
				
			||||||
 | 
					import resolve from '@rollup/plugin-node-resolve';
 | 
				
			||||||
 | 
					import livereload from 'rollup-plugin-livereload';
 | 
				
			||||||
 | 
					import { terser } from 'rollup-plugin-terser';
 | 
				
			||||||
 | 
					import css from 'rollup-plugin-css-only';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const production = !process.env.ROLLUP_WATCH;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function serve() {
 | 
				
			||||||
 | 
						let server;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						function toExit() {
 | 
				
			||||||
 | 
							if (server) server.kill(0);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return {
 | 
				
			||||||
 | 
							writeBundle() {
 | 
				
			||||||
 | 
								if (server) return;
 | 
				
			||||||
 | 
								server = require('child_process').spawn('npm', ['run', 'start', '--', '--dev'], {
 | 
				
			||||||
 | 
									stdio: ['ignore', 'inherit', 'inherit'],
 | 
				
			||||||
 | 
									shell: true
 | 
				
			||||||
 | 
								});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								process.on('SIGTERM', toExit);
 | 
				
			||||||
 | 
								process.on('exit', toExit);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export default {
 | 
				
			||||||
 | 
						input: 'src/main.js',
 | 
				
			||||||
 | 
						output: {
 | 
				
			||||||
 | 
							sourcemap: true,
 | 
				
			||||||
 | 
							format: 'iife',
 | 
				
			||||||
 | 
							name: 'app',
 | 
				
			||||||
 | 
							file: 'public/build/bundle.js'
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
						plugins: [
 | 
				
			||||||
 | 
							svelte({
 | 
				
			||||||
 | 
								compilerOptions: {
 | 
				
			||||||
 | 
									// enable run-time checks when not in production
 | 
				
			||||||
 | 
									dev: !production
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}),
 | 
				
			||||||
 | 
							// we'll extract any component CSS out into
 | 
				
			||||||
 | 
							// a separate file - better for performance
 | 
				
			||||||
 | 
							css({ output: 'bundle.css' }),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// If you have external dependencies installed from
 | 
				
			||||||
 | 
							// npm, you'll most likely need these plugins. In
 | 
				
			||||||
 | 
							// some cases you'll need additional configuration -
 | 
				
			||||||
 | 
							// consult the documentation for details:
 | 
				
			||||||
 | 
							// https://github.com/rollup/plugins/tree/master/packages/commonjs
 | 
				
			||||||
 | 
							resolve({
 | 
				
			||||||
 | 
								browser: true,
 | 
				
			||||||
 | 
								dedupe: ['svelte']
 | 
				
			||||||
 | 
							}),
 | 
				
			||||||
 | 
							commonjs(),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// In dev mode, call `npm run start` once
 | 
				
			||||||
 | 
							// the bundle has been generated
 | 
				
			||||||
 | 
							!production && serve(),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// Watch the `public` directory and refresh the
 | 
				
			||||||
 | 
							// browser on changes when not in production
 | 
				
			||||||
 | 
							!production && livereload('public'),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// If we're building for production (npm run build
 | 
				
			||||||
 | 
							// instead of npm run dev), minify
 | 
				
			||||||
 | 
							production && terser()
 | 
				
			||||||
 | 
						],
 | 
				
			||||||
 | 
						watch: {
 | 
				
			||||||
 | 
							clearScreen: false
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
							
								
								
									
										121
									
								
								news_check/client/scripts/setupTypeScript.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								news_check/client/scripts/setupTypeScript.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,121 @@
 | 
				
			|||||||
 | 
					// @ts-check
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/** This script modifies the project to support TS code in .svelte files like:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  <script lang="ts">
 | 
				
			||||||
 | 
					  	export let name: string;
 | 
				
			||||||
 | 
					  </script>
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					  As well as validating the code for CI.
 | 
				
			||||||
 | 
					  */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**  To work on this script:
 | 
				
			||||||
 | 
					  rm -rf test-template template && git clone sveltejs/template test-template && node scripts/setupTypeScript.js test-template
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const fs = require("fs")
 | 
				
			||||||
 | 
					const path = require("path")
 | 
				
			||||||
 | 
					const { argv } = require("process")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const projectRoot = argv[2] || path.join(__dirname, "..")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Add deps to pkg.json
 | 
				
			||||||
 | 
					const packageJSON = JSON.parse(fs.readFileSync(path.join(projectRoot, "package.json"), "utf8"))
 | 
				
			||||||
 | 
					packageJSON.devDependencies = Object.assign(packageJSON.devDependencies, {
 | 
				
			||||||
 | 
					  "svelte-check": "^2.0.0",
 | 
				
			||||||
 | 
					  "svelte-preprocess": "^4.0.0",
 | 
				
			||||||
 | 
					  "@rollup/plugin-typescript": "^8.0.0",
 | 
				
			||||||
 | 
					  "typescript": "^4.0.0",
 | 
				
			||||||
 | 
					  "tslib": "^2.0.0",
 | 
				
			||||||
 | 
					  "@tsconfig/svelte": "^2.0.0"
 | 
				
			||||||
 | 
					})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Add script for checking
 | 
				
			||||||
 | 
					packageJSON.scripts = Object.assign(packageJSON.scripts, {
 | 
				
			||||||
 | 
					  "check": "svelte-check --tsconfig ./tsconfig.json"
 | 
				
			||||||
 | 
					})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Write the package JSON
 | 
				
			||||||
 | 
					fs.writeFileSync(path.join(projectRoot, "package.json"), JSON.stringify(packageJSON, null, "  "))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// mv src/main.js to main.ts - note, we need to edit rollup.config.js for this too
 | 
				
			||||||
 | 
					const beforeMainJSPath = path.join(projectRoot, "src", "main.js")
 | 
				
			||||||
 | 
					const afterMainTSPath = path.join(projectRoot, "src", "main.ts")
 | 
				
			||||||
 | 
					fs.renameSync(beforeMainJSPath, afterMainTSPath)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Switch the app.svelte file to use TS
 | 
				
			||||||
 | 
					const appSveltePath = path.join(projectRoot, "src", "App.svelte")
 | 
				
			||||||
 | 
					let appFile = fs.readFileSync(appSveltePath, "utf8")
 | 
				
			||||||
 | 
					appFile = appFile.replace("<script>", '<script lang="ts">')
 | 
				
			||||||
 | 
					appFile = appFile.replace("export let name;", 'export let name: string;')
 | 
				
			||||||
 | 
					fs.writeFileSync(appSveltePath, appFile)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Edit rollup config
 | 
				
			||||||
 | 
					const rollupConfigPath = path.join(projectRoot, "rollup.config.js")
 | 
				
			||||||
 | 
					let rollupConfig = fs.readFileSync(rollupConfigPath, "utf8")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Edit imports
 | 
				
			||||||
 | 
					rollupConfig = rollupConfig.replace(`'rollup-plugin-terser';`, `'rollup-plugin-terser';
 | 
				
			||||||
 | 
					import sveltePreprocess from 'svelte-preprocess';
 | 
				
			||||||
 | 
					import typescript from '@rollup/plugin-typescript';`)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Replace name of entry point
 | 
				
			||||||
 | 
					rollupConfig = rollupConfig.replace(`'src/main.js'`, `'src/main.ts'`)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Add preprocessor
 | 
				
			||||||
 | 
					rollupConfig = rollupConfig.replace(
 | 
				
			||||||
 | 
					  'compilerOptions:',
 | 
				
			||||||
 | 
					  'preprocess: sveltePreprocess({ sourceMap: !production }),\n\t\t\tcompilerOptions:'
 | 
				
			||||||
 | 
					);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Add TypeScript
 | 
				
			||||||
 | 
					rollupConfig = rollupConfig.replace(
 | 
				
			||||||
 | 
					  'commonjs(),',
 | 
				
			||||||
 | 
					  'commonjs(),\n\t\ttypescript({\n\t\t\tsourceMap: !production,\n\t\t\tinlineSources: !production\n\t\t}),'
 | 
				
			||||||
 | 
					);
 | 
				
			||||||
 | 
					fs.writeFileSync(rollupConfigPath, rollupConfig)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Add TSConfig
 | 
				
			||||||
 | 
					const tsconfig = `{
 | 
				
			||||||
 | 
					  "extends": "@tsconfig/svelte/tsconfig.json",
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  "include": ["src/**/*"],
 | 
				
			||||||
 | 
					  "exclude": ["node_modules/*", "__sapper__/*", "public/*"]
 | 
				
			||||||
 | 
					}`
 | 
				
			||||||
 | 
					const tsconfigPath =  path.join(projectRoot, "tsconfig.json")
 | 
				
			||||||
 | 
					fs.writeFileSync(tsconfigPath, tsconfig)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Add global.d.ts
 | 
				
			||||||
 | 
					const dtsPath =  path.join(projectRoot, "src", "global.d.ts")
 | 
				
			||||||
 | 
					fs.writeFileSync(dtsPath, `/// <reference types="svelte" />`)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Delete this script, but not during testing
 | 
				
			||||||
 | 
					if (!argv[2]) {
 | 
				
			||||||
 | 
					  // Remove the script
 | 
				
			||||||
 | 
					  fs.unlinkSync(path.join(__filename))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Check for Mac's DS_store file, and if it's the only one left remove it
 | 
				
			||||||
 | 
					  const remainingFiles = fs.readdirSync(path.join(__dirname))
 | 
				
			||||||
 | 
					  if (remainingFiles.length === 1 && remainingFiles[0] === '.DS_store') {
 | 
				
			||||||
 | 
					    fs.unlinkSync(path.join(__dirname, '.DS_store'))
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Check if the scripts folder is empty
 | 
				
			||||||
 | 
					  if (fs.readdirSync(path.join(__dirname)).length === 0) {
 | 
				
			||||||
 | 
					    // Remove the scripts folder
 | 
				
			||||||
 | 
					    fs.rmdirSync(path.join(__dirname))
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Adds the extension recommendation
 | 
				
			||||||
 | 
					fs.mkdirSync(path.join(projectRoot, ".vscode"), { recursive: true })
 | 
				
			||||||
 | 
					fs.writeFileSync(path.join(projectRoot, ".vscode", "extensions.json"), `{
 | 
				
			||||||
 | 
					  "recommendations": ["svelte.svelte-vscode"]
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					`)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					console.log("Converted to TypeScript.")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (fs.existsSync(path.join(projectRoot, "node_modules"))) {
 | 
				
			||||||
 | 
					  console.log("\nYou will need to re-run your dependency manager to get started.")
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										17
									
								
								news_check/client/src/App.svelte
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								news_check/client/src/App.svelte
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,17 @@
 | 
				
			|||||||
 | 
					<script>
 | 
				
			||||||
 | 
						import PDFView from './PDFView.svelte';
 | 
				
			||||||
 | 
						import ArticleStatus from './ArticleStatus.svelte';
 | 
				
			||||||
 | 
						import ArticleOperations from './ArticleOperations.svelte';
 | 
				
			||||||
 | 
					</script>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<div class="flex w-full h-full gap-5 p-5">
 | 
				
			||||||
 | 
						<div class="w-3/5"><PDFView/></div>
 | 
				
			||||||
 | 
						<div class="divider divider-horizontal"></div> 
 | 
				
			||||||
 | 
						<div class="w-2/5">
 | 
				
			||||||
 | 
							<ArticleStatus article_id={42}/>
 | 
				
			||||||
 | 
							<div class="divider divider-vertical"></div> 
 | 
				
			||||||
 | 
							<ArticleOperations/>
 | 
				
			||||||
 | 
						</div>
 | 
				
			||||||
 | 
					</div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										28
									
								
								news_check/client/src/ArticleOperations.svelte
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								news_check/client/src/ArticleOperations.svelte
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,28 @@
 | 
				
			|||||||
 | 
					<div class="toast">
 | 
				
			||||||
 | 
						<div class="alert alert-info">
 | 
				
			||||||
 | 
						    <div>
 | 
				
			||||||
 | 
					            <span>New message arrived.</span>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					    </div>
 | 
				
			||||||
 | 
					</div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<div class="grid grid-cols-3 gap-4">
 | 
				
			||||||
 | 
					    <div class="highlight">01</div>
 | 
				
			||||||
 | 
					    <div class="highlight">01</div>
 | 
				
			||||||
 | 
					    <div class="highlight">01</div>
 | 
				
			||||||
 | 
					    <div class="highlight">01</div>
 | 
				
			||||||
 | 
					    <div class="highlight">01</div>
 | 
				
			||||||
 | 
					    <div class="highlight">01</div>
 | 
				
			||||||
 | 
					    <div class="highlight">01</div>
 | 
				
			||||||
 | 
					    <div class="highlight">01</div>
 | 
				
			||||||
 | 
					    <div class="highlight">01</div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					</div>
 | 
				
			||||||
 | 
					<style>
 | 
				
			||||||
 | 
					    .highlight {
 | 
				
			||||||
 | 
					        background-color: #f5f5f5;
 | 
				
			||||||
 | 
					        border-radius: 5px;
 | 
				
			||||||
 | 
					        padding: 10px;
 | 
				
			||||||
 | 
					        margin: 10px;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					</style>
 | 
				
			||||||
							
								
								
									
										25
									
								
								news_check/client/src/ArticleStatus.svelte
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								news_check/client/src/ArticleStatus.svelte
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,25 @@
 | 
				
			|||||||
 | 
					<script>
 | 
				
			||||||
 | 
					    export let article_id;
 | 
				
			||||||
 | 
					    const Article = (async () => {
 | 
				
			||||||
 | 
					    const response = await fetch('/api/article/' + article_id + '/get')
 | 
				
			||||||
 | 
					    return await response.json()
 | 
				
			||||||
 | 
						})()
 | 
				
			||||||
 | 
					    console.log(Article)
 | 
				
			||||||
 | 
					</script>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<div class="mockup-window border bg-base-300">
 | 
				
			||||||
 | 
					    <h1 class="center">Article overview</h1>
 | 
				
			||||||
 | 
					    <ul tabindex="0" class="menu p-2 shadow bg-base-100 rounded-box w-52">
 | 
				
			||||||
 | 
					        {#await Article}
 | 
				
			||||||
 | 
					            <li>...waiting</li>
 | 
				
			||||||
 | 
					        {:then data}
 | 
				
			||||||
 | 
					            <li><a href="#">{data.value}</a></li>
 | 
				
			||||||
 | 
					            <li><a href="#">Item 2</a></li>
 | 
				
			||||||
 | 
					        {:catch error}
 | 
				
			||||||
 | 
					            <li>An error occurred!</li>
 | 
				
			||||||
 | 
					        {/await}
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					    </ul>
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					</div>
 | 
				
			||||||
							
								
								
									
										69
									
								
								news_check/client/src/PDFView.svelte
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								news_check/client/src/PDFView.svelte
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,69 @@
 | 
				
			|||||||
 | 
					<!-- 
 | 
				
			||||||
 | 
					<script>
 | 
				
			||||||
 | 
						var myState = {
 | 
				
			||||||
 | 
							pdf: null,
 | 
				
			||||||
 | 
							currentPage: 1,
 | 
				
			||||||
 | 
							zoom: 1
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
						pdfjsLib.getDocument('test.pdf').then((pdf) => {
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
							myState.pdf = pdf;
 | 
				
			||||||
 | 
							render();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						function render() {
 | 
				
			||||||
 | 
							myState.pdf.getPage(myState.currentPage).then((page) => {
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
								var canvas = document.getElementById("pdf_renderer");
 | 
				
			||||||
 | 
								var ctx = canvas.getContext('2d');
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
								var viewport = page.getViewport(myState.zoom);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								canvas.width = viewport.width;
 | 
				
			||||||
 | 
								canvas.height = viewport.height;
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
								page.render({
 | 
				
			||||||
 | 
									canvasContext: ctx,
 | 
				
			||||||
 | 
									viewport: viewport
 | 
				
			||||||
 | 
								});
 | 
				
			||||||
 | 
							});
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					</script>
 | 
				
			||||||
 | 
					  -->
 | 
				
			||||||
 | 
					<!-- <div id="my_pdf_viewer">
 | 
				
			||||||
 | 
					    <div class="mockup-window border bg-base-300">
 | 
				
			||||||
 | 
					        <div id="canvas_container" class="flex justify-center">
 | 
				
			||||||
 | 
					            <canvas id="pdf_renderer"></canvas>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					    </div>
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    <div id="navigation_controls">
 | 
				
			||||||
 | 
					        <button id="go_previous">Previous</button>
 | 
				
			||||||
 | 
					        <input id="current_page" value="1" type="number"/>
 | 
				
			||||||
 | 
					        <button id="go_next">Next</button>
 | 
				
			||||||
 | 
					    </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    <div id="zoom_controls">  
 | 
				
			||||||
 | 
					        <button id="zoom_in">+</button>
 | 
				
			||||||
 | 
					        <button id="zoom_out">-</button>
 | 
				
			||||||
 | 
					    </div>
 | 
				
			||||||
 | 
					</div> -->
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<script>
 | 
				
			||||||
 | 
						let pdf_file = 'test.pdf';
 | 
				
			||||||
 | 
					</script> 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<div class="mockup-window border bg-base-300 h-full w-full">
 | 
				
			||||||
 | 
						<object class="pdf-view" data="{pdf_file}" title="Article PDF"> </object>
 | 
				
			||||||
 | 
					</div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<style>
 | 
				
			||||||
 | 
						.pdf-view {
 | 
				
			||||||
 | 
							width: 100%;
 | 
				
			||||||
 | 
							height: 100%;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					</style>
 | 
				
			||||||
							
								
								
									
										10
									
								
								news_check/client/src/main.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								news_check/client/src/main.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,10 @@
 | 
				
			|||||||
 | 
					import App from './App.svelte';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const app = new App({
 | 
				
			||||||
 | 
						target: document.body,
 | 
				
			||||||
 | 
						props: {
 | 
				
			||||||
 | 
							name: 'world'
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export default app;
 | 
				
			||||||
							
								
								
									
										4
									
								
								news_check/requirements.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								news_check/requirements.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,4 @@
 | 
				
			|||||||
 | 
					flask
 | 
				
			||||||
 | 
					peewee
 | 
				
			||||||
 | 
					markdown
 | 
				
			||||||
 | 
					psycopg2
 | 
				
			||||||
							
								
								
									
										38
									
								
								news_check/server/app.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								news_check/server/app.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,38 @@
 | 
				
			|||||||
 | 
					import json
 | 
				
			||||||
 | 
					from flask import Flask, send_from_directory, jsonify
 | 
				
			||||||
 | 
					import random
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					app = Flask(__name__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					###############################################################################
 | 
				
			||||||
 | 
					# SVELTE 'STATIC' BACKEND. Always send index.html and the requested js-files. (compiled by npm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@app.route("/") #index.html
 | 
				
			||||||
 | 
					def base():
 | 
				
			||||||
 | 
					    return send_from_directory('../client/public', 'index.html')
 | 
				
			||||||
 | 
					@app.route("/<path:path>") #js-files
 | 
				
			||||||
 | 
					def home(path):
 | 
				
			||||||
 | 
					    return send_from_directory('../client/public', path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					###############################################################################
 | 
				
			||||||
 | 
					# API for news_check.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@app.route("/api/article/<int:id>/get")
 | 
				
			||||||
 | 
					def get_article(id):
 | 
				
			||||||
 | 
					    res = {"value": id}
 | 
				
			||||||
 | 
					    return jsonify(res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@app.route("/api/article/<int:id>/set", methods=['POST'])
 | 
				
			||||||
 | 
					def set_article(id):
 | 
				
			||||||
 | 
					    return str(random.randint(0, 100))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
					    app.run(host="0.0.0.0", port="80")
 | 
				
			||||||
							
								
								
									
										20
									
								
								news_check/test.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								news_check/test.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,20 @@
 | 
				
			|||||||
 | 
					import peewee
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					db = peewee.PostgresqlDatabase('coss_archiving', user='ca_rw', password='pleasechangeit', host='vpn', port=5432)
 | 
				
			||||||
 | 
					# db.connect()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Pet(peewee.Model):
 | 
				
			||||||
 | 
					    name = peewee.CharField()
 | 
				
			||||||
 | 
					    animal_type = peewee.CharField()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    class Meta:
 | 
				
			||||||
 | 
					        database = db # this model uses the "people.db" database
 | 
				
			||||||
 | 
					with db:
 | 
				
			||||||
 | 
					    db.create_tables([Pet])
 | 
				
			||||||
 | 
					db.get_tables()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					t = Pet.create(name="Test", animal_type="test")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for pet in Pet.select():
 | 
				
			||||||
 | 
					    print(pet.name)
 | 
				
			||||||
@@ -1,2 +1,2 @@
 | 
				
			|||||||
.dev/
 | 
					Dockerfile
 | 
				
			||||||
__pycache__/
 | 
					__pycache__/
 | 
				
			||||||
@@ -3,25 +3,18 @@ FROM python:latest
 | 
				
			|||||||
ENV TZ Europe/Zurich
 | 
					ENV TZ Europe/Zurich
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
RUN apt-get update && apt-get install -y \
 | 
					 | 
				
			||||||
evince \
 | 
					 | 
				
			||||||
# for checking
 | 
					 | 
				
			||||||
xauth \
 | 
					 | 
				
			||||||
#for gui
 | 
					 | 
				
			||||||
ghostscript
 | 
					 | 
				
			||||||
# for compression
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
RUN useradd --create-home --shell /bin/bash --uid 1001 autonews
 | 
					RUN useradd --create-home --shell /bin/bash --uid 1001 autonews
 | 
				
			||||||
# id mapped to local user
 | 
					# id mapped to local user
 | 
				
			||||||
# home directory needed for pip package installation
 | 
					# home directory needed for pip package installation
 | 
				
			||||||
 | 
					RUN export PATH=/home/autonews/.local/bin:$PATH
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
RUN mkdir -p /app/auto_news
 | 
					RUN mkdir -p /app/auto_news
 | 
				
			||||||
RUN chown -R autonews:autonews /app
 | 
					RUN chown -R autonews:autonews /app
 | 
				
			||||||
USER autonews
 | 
					USER autonews
 | 
				
			||||||
RUN export PATH=/home/autonews/.local/bin:$PATH
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
COPY requirements.txt /app/requirements.txt
 | 
					COPY requirements.txt /app/requirements.txt
 | 
				
			||||||
RUN python3 -m pip install -r /app/requirements.txt
 | 
					RUN python3 -m pip install -r /app/requirements.txt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
COPY app /app/auto_news
 | 
					COPY . /app/auto_news
 | 
				
			||||||
WORKDIR /app/auto_news
 | 
					WORKDIR /app/auto_news
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,59 +0,0 @@
 | 
				
			|||||||
from dataclasses import dataclass
 | 
					 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import shutil
 | 
					 | 
				
			||||||
import configparser
 | 
					 | 
				
			||||||
import logging
 | 
					 | 
				
			||||||
from datetime import datetime
 | 
					 | 
				
			||||||
from peewee import SqliteDatabase
 | 
					 | 
				
			||||||
from rich.logging import RichHandler
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# first things first: logging
 | 
					 | 
				
			||||||
logging.basicConfig(
 | 
					 | 
				
			||||||
    format='%(message)s',
 | 
					 | 
				
			||||||
    level=logging.INFO,
 | 
					 | 
				
			||||||
    datefmt='%H:%M:%S', # add %Y-%m-%d if needed
 | 
					 | 
				
			||||||
    handlers=[RichHandler()]
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
logger = logging.getLogger(__name__)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# load config file containing constants and secrets
 | 
					 | 
				
			||||||
parsed = configparser.ConfigParser()
 | 
					 | 
				
			||||||
parsed.read("/app/containerdata/config/news_fetch.config.ini")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if os.getenv("DEBUG", "false") == "true":
 | 
					 | 
				
			||||||
    logger.warning("Found 'DEBUG=true', setting up dummy databases")
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    db_base_path = parsed["DATABASE"]["db_path_dev"]
 | 
					 | 
				
			||||||
    parsed["SLACK"]["archive_id"] = parsed["SLACK"]["debug_id"]
 | 
					 | 
				
			||||||
    parsed["MAIL"]["recipient"] = parsed["MAIL"]["sender"]
 | 
					 | 
				
			||||||
    parsed["DOWNLOADS"]["local_storage_path"] = parsed["DATABASE"]["db_path_dev"]
 | 
					 | 
				
			||||||
else:
 | 
					 | 
				
			||||||
    logger.warning("Found 'DEBUG=false' and running on production databases, I hope you know what you're doing...")
 | 
					 | 
				
			||||||
    db_base_path = parsed["DATABASE"]["db_path_prod"]
 | 
					 | 
				
			||||||
    logger.info("Backing up databases")
 | 
					 | 
				
			||||||
    backup_dst = parsed["DATABASE"]["db_backup"]
 | 
					 | 
				
			||||||
    today = datetime.today().strftime("%Y.%m.%d")
 | 
					 | 
				
			||||||
    shutil.copyfile(
 | 
					 | 
				
			||||||
        os.path.join(db_base_path, parsed["DATABASE"]["chat_db_name"]), 
 | 
					 | 
				
			||||||
        os.path.join(backup_dst, today + "." + parsed["DATABASE"]["chat_db_name"]), 
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
    shutil.copyfile(
 | 
					 | 
				
			||||||
        os.path.join(db_base_path, parsed["DATABASE"]["download_db_name"]), 
 | 
					 | 
				
			||||||
        os.path.join(backup_dst, today + "." + parsed["DATABASE"]["download_db_name"]), 
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from utils_storage import models
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Set up the database
 | 
					 | 
				
			||||||
models.set_db(
 | 
					 | 
				
			||||||
    SqliteDatabase(
 | 
					 | 
				
			||||||
        os.path.join(db_base_path, parsed["DATABASE"]["chat_db_name"]),
 | 
					 | 
				
			||||||
        pragmas = {'journal_mode': 'wal'} # mutliple threads can read at once
 | 
					 | 
				
			||||||
    ),
 | 
					 | 
				
			||||||
    SqliteDatabase(
 | 
					 | 
				
			||||||
        os.path.join(db_base_path, parsed["DATABASE"]["download_db_name"]),
 | 
					 | 
				
			||||||
        pragmas = {'journal_mode': 'wal'} # mutliple threads can read at once
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
@@ -1,285 +0,0 @@
 | 
				
			|||||||
import logging
 | 
					 | 
				
			||||||
import configuration
 | 
					 | 
				
			||||||
import requests
 | 
					 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import time
 | 
					 | 
				
			||||||
from threading import Thread
 | 
					 | 
				
			||||||
from slack_sdk.errors import SlackApiError
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
logger = logging.getLogger(__name__)
 | 
					 | 
				
			||||||
config = configuration.parsed["SLACK"]
 | 
					 | 
				
			||||||
models = configuration.models
 | 
					 | 
				
			||||||
slack_client = "dummy"
 | 
					 | 
				
			||||||
LATEST_RECORDED_REACTION = 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def init(client) -> None:
 | 
					 | 
				
			||||||
    """Starts fetching past messages and returns the freshly launched thread"""
 | 
					 | 
				
			||||||
    global slack_client
 | 
					 | 
				
			||||||
    slack_client = client
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    global LATEST_RECORDED_REACTION
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        LATEST_RECORDED_REACTION = models.Reaction.select(models.Reaction.id).order_by("id")[-1]
 | 
					 | 
				
			||||||
    except IndexError: #query is actually empty, we have never fetched any messages until now
 | 
					 | 
				
			||||||
        LATEST_RECORDED_REACTION = 0    
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    # fetch all te messages we could have possibly missed
 | 
					 | 
				
			||||||
    logger.info("Querying missed messages, threads and reactions. This can take some time.")
 | 
					 | 
				
			||||||
    fetch_missed_channel_messages() # not threaded
 | 
					 | 
				
			||||||
    t = Thread(target = fetch_missed_channel_reactions, daemon=True) # threaded, runs in background (usually takes a long time)
 | 
					 | 
				
			||||||
    t.start()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if os.getenv("REDUCEDFETCH", "false") == "true":
 | 
					 | 
				
			||||||
        logger.warning("Only fetching empty threads for bot messages because 'REDUCEDFETCH=true'")
 | 
					 | 
				
			||||||
        fetch_missed_thread_messages(reduced=True)
 | 
					 | 
				
			||||||
    else: # perform both asyncronously
 | 
					 | 
				
			||||||
        fetch_missed_thread_messages()
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_unhandled_messages():
 | 
					 | 
				
			||||||
    """Gets all messages that have not yet been handled, be it by mistake or by downtime
 | 
					 | 
				
			||||||
    As the message handler makes no distinction between channel messages and thread messages,
 | 
					 | 
				
			||||||
    we don't have to worry about them here.
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    threaded_objects = []
 | 
					 | 
				
			||||||
    for t in models.Thread.select():
 | 
					 | 
				
			||||||
        if t.message_count > 1: # if only one message was written, it is the channel message
 | 
					 | 
				
			||||||
            msg = t.last_message
 | 
					 | 
				
			||||||
            if msg.is_by_human:
 | 
					 | 
				
			||||||
                threaded_objects.append(msg)
 | 
					 | 
				
			||||||
            # else don't, nothing to process
 | 
					 | 
				
			||||||
    logger.info(f"Set {len(threaded_objects)} thread-messages as not yet handled.")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    channel_objects = [t.initiator_message for t in models.Thread.select() if (t.message_count == 1 and not t.is_fully_processed)]
 | 
					 | 
				
			||||||
    logger.info(f"Set {len(channel_objects)} channel-messages as not yet handled.")
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    reaction_objects = list(models.Reaction.select().where(models.Reaction.id > LATEST_RECORDED_REACTION))
 | 
					 | 
				
			||||||
    logger.info(f"Set {len(reaction_objects)} reactions as not yet handled.")
 | 
					 | 
				
			||||||
    # the ones newer than the last before the fetch
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    all_messages = channel_objects + threaded_objects
 | 
					 | 
				
			||||||
    return all_messages, reaction_objects
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def fetch_missed_channel_messages():
 | 
					 | 
				
			||||||
    # latest processed message_ts is:
 | 
					 | 
				
			||||||
    presaved = models.Message.select().order_by(models.Message.ts)
 | 
					 | 
				
			||||||
    if not presaved:
 | 
					 | 
				
			||||||
        last_ts = 0
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        last_message = presaved[-1]
 | 
					 | 
				
			||||||
        last_ts = last_message.slack_ts
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    result = slack_client.conversations_history(
 | 
					 | 
				
			||||||
        channel=config["archive_id"],
 | 
					 | 
				
			||||||
        oldest=last_ts
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    new_messages = result.get("messages", [])
 | 
					 | 
				
			||||||
    # # filter the last one, it is a duplicate! (only if the db is not empty!)
 | 
					 | 
				
			||||||
    # if last_ts != 0 and len(new_messages) != 0:
 | 
					 | 
				
			||||||
    #     new_messages.pop(-1)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    new_fetches = 0
 | 
					 | 
				
			||||||
    for m in new_messages:
 | 
					 | 
				
			||||||
        # print(m)
 | 
					 | 
				
			||||||
        message_dict_to_model(m)
 | 
					 | 
				
			||||||
        new_fetches += 1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    refetch = result.get("has_more", False)
 | 
					 | 
				
			||||||
    while refetch: # we have not actually fetched them all
 | 
					 | 
				
			||||||
        try:
 | 
					 | 
				
			||||||
            result = slack_client.conversations_history(
 | 
					 | 
				
			||||||
                channel = config["archive_id"],
 | 
					 | 
				
			||||||
                cursor = result["response_metadata"]["next_cursor"],
 | 
					 | 
				
			||||||
                oldest = last_ts
 | 
					 | 
				
			||||||
            ) # fetches 100 messages, older than the [-1](=oldest) element of new_fetches
 | 
					 | 
				
			||||||
            refetch = result.get("has_more", False)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            new_messages = result.get("messages", [])
 | 
					 | 
				
			||||||
            for m in new_messages:
 | 
					 | 
				
			||||||
                message_dict_to_model(m)
 | 
					 | 
				
			||||||
                new_fetches += 1
 | 
					 | 
				
			||||||
        except SlackApiError: # Most likely a rate-limit
 | 
					 | 
				
			||||||
            logger.error("Error while fetching channel messages. (likely rate limit) Retrying in {} seconds...".format(config["api_wait_time"]))
 | 
					 | 
				
			||||||
            time.sleep(config["api_wait_time"])
 | 
					 | 
				
			||||||
            refetch = True
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    logger.info(f"Fetched {new_fetches} new channel messages.")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def fetch_missed_thread_messages(reduced=False):
 | 
					 | 
				
			||||||
    """After having gotten all base-threads, we need to fetch all their replies"""        
 | 
					 | 
				
			||||||
    # I don't know of a better way: we need to fetch this for each and every thread (except if it is marked as permanently solved)
 | 
					 | 
				
			||||||
    logger.info("Starting fetch of thread messages...")
 | 
					 | 
				
			||||||
    if reduced:
 | 
					 | 
				
			||||||
        threads = [t for t in models.Thread.select() if (t.message_count == 1 and not t.is_fully_processed)]
 | 
					 | 
				
			||||||
        # this only fetches completely empty threads, which might be because the bot-message was not yet saved to the db.
 | 
					 | 
				
			||||||
        # once we got all the bot-messages the remaining empty threads will be the ones we need to process.
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        threads = [t for t in models.Thread.select() if not t.is_fully_processed]
 | 
					 | 
				
			||||||
    logger.info(f"Fetching history for {len(threads)} empty threads")
 | 
					 | 
				
			||||||
    new_messages = []
 | 
					 | 
				
			||||||
    for i,t in enumerate(threads):
 | 
					 | 
				
			||||||
        try:
 | 
					 | 
				
			||||||
            messages = slack_client.conversations_replies(
 | 
					 | 
				
			||||||
                channel = config["archive_id"],
 | 
					 | 
				
			||||||
                ts = t.slack_ts,
 | 
					 | 
				
			||||||
                oldest = t.messages[-1].slack_ts
 | 
					 | 
				
			||||||
            )["messages"]
 | 
					 | 
				
			||||||
        except SlackApiError:
 | 
					 | 
				
			||||||
            logger.error("Hit rate limit while querying threaded messages, retrying in {}s ({}/{} queries elapsed)".format(config["api_wait_time"], i, len(threads)))
 | 
					 | 
				
			||||||
            time.sleep(int(config["api_wait_time"]))
 | 
					 | 
				
			||||||
            messages = slack_client.conversations_replies(
 | 
					 | 
				
			||||||
                channel = config["archive_id"],
 | 
					 | 
				
			||||||
                ts = t.slack_ts,
 | 
					 | 
				
			||||||
                oldest = t.messages[-1].slack_ts
 | 
					 | 
				
			||||||
            )["messages"]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        messages.pop(0) # the first message is the one posted in the channel. We already processed it!
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        for m in messages:
 | 
					 | 
				
			||||||
            # only append *new* messages
 | 
					 | 
				
			||||||
            res = message_dict_to_model(m)
 | 
					 | 
				
			||||||
            if res:
 | 
					 | 
				
			||||||
                new_messages.append(res)
 | 
					 | 
				
			||||||
    logger.info("Fetched {} new threaded messages.".format(len(new_messages)))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def fetch_missed_channel_reactions():
 | 
					 | 
				
			||||||
    logger.info("Starting background fetch of channel reactions...")
 | 
					 | 
				
			||||||
    threads = [t for t in models.Thread.select() if not t.is_fully_processed]
 | 
					 | 
				
			||||||
    for i,t in enumerate(threads):
 | 
					 | 
				
			||||||
        reactions = []
 | 
					 | 
				
			||||||
        try:
 | 
					 | 
				
			||||||
            query = slack_client.reactions_get(
 | 
					 | 
				
			||||||
                channel = config["archive_id"],
 | 
					 | 
				
			||||||
                timestamp = t.slack_ts
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
            reactions = query.get("message", []).get("reactions", []) # default = []
 | 
					 | 
				
			||||||
        except SlackApiError as e:
 | 
					 | 
				
			||||||
            if e.response.get("error", "") == "message_not_found":
 | 
					 | 
				
			||||||
                m = t.initiator_message
 | 
					 | 
				
			||||||
                logger.warning(f"Message (id={m.id}) not found. Skipping and saving...")
 | 
					 | 
				
			||||||
                # this usually means the message is past the 1000 message limit imposed by slack. Mark it as processed in the db
 | 
					 | 
				
			||||||
                m.is_processed_override = True
 | 
					 | 
				
			||||||
                m.save()
 | 
					 | 
				
			||||||
            else: # probably a rate_limit:
 | 
					 | 
				
			||||||
                logger.error("Hit rate limit while querying reactions. retrying in {}s ({}/{} queries elapsed)".format(config["api_wait_time"], i, len(threads)))
 | 
					 | 
				
			||||||
                time.sleep(int(config["api_wait_time"]))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for r in reactions:
 | 
					 | 
				
			||||||
            reaction_dict_to_model(r, t)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Helpers for message conversion to db-objects
 | 
					 | 
				
			||||||
def reaction_dict_to_model(reaction, thread=None):
 | 
					 | 
				
			||||||
    if thread is None:
 | 
					 | 
				
			||||||
        m_ts = reaction["item"]["ts"]
 | 
					 | 
				
			||||||
        message = models.Message.get(ts = float(m_ts))
 | 
					 | 
				
			||||||
        thread = message.thread
 | 
					 | 
				
			||||||
    if "name" in reaction.keys(): # fetched through manual api query
 | 
					 | 
				
			||||||
        content = reaction["name"]
 | 
					 | 
				
			||||||
    elif "reaction" in reaction.keys(): # fetched through events
 | 
					 | 
				
			||||||
        content = reaction["reaction"]
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        logger.error(f"Weird reaction received: {reaction}")
 | 
					 | 
				
			||||||
        return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    r, _ = models.Reaction.get_or_create(
 | 
					 | 
				
			||||||
        type = content,
 | 
					 | 
				
			||||||
        message = thread.initiator_message
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
    logger.info("Saved reaction [{}]".format(content))
 | 
					 | 
				
			||||||
    return r
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def message_dict_to_model(message):
 | 
					 | 
				
			||||||
    if message["type"] == "message":
 | 
					 | 
				
			||||||
        thread_ts = message["thread_ts"] if "thread_ts" in message else message["ts"]
 | 
					 | 
				
			||||||
        uid = message.get("user", "BAD USER")
 | 
					 | 
				
			||||||
        if uid == "BAD USER":
 | 
					 | 
				
			||||||
            logger.critical("Message has no user?? {}".format(message))
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        user, _ = models.User.get_or_create(user_id = uid)
 | 
					 | 
				
			||||||
        thread, _ = models.Thread.get_or_create(thread_ts = thread_ts)
 | 
					 | 
				
			||||||
        m, new = models.Message.get_or_create(
 | 
					 | 
				
			||||||
            user = user,
 | 
					 | 
				
			||||||
            thread = thread,
 | 
					 | 
				
			||||||
            ts = message["ts"],
 | 
					 | 
				
			||||||
            channel_id = config["archive_id"],
 | 
					 | 
				
			||||||
            text = message["text"]
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        logger.info(f"Saved: {m} ({'new' if new else 'old'})")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        files = message.get("files", [])
 | 
					 | 
				
			||||||
        if len(files) >= 1:
 | 
					 | 
				
			||||||
            f = files[0] #default: []
 | 
					 | 
				
			||||||
            m.file_type = f["filetype"]
 | 
					 | 
				
			||||||
            m.perma_link = f["url_private_download"]
 | 
					 | 
				
			||||||
            m.save()
 | 
					 | 
				
			||||||
            logger.info(f"Saved {m.file_type}-file for message (id={m.id})")
 | 
					 | 
				
			||||||
        if new:
 | 
					 | 
				
			||||||
            return m
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        logger.warning("What should I do of {}".format(message))
 | 
					 | 
				
			||||||
        return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def say_substitute(*args, **kwargs):
 | 
					 | 
				
			||||||
    logger.info("Now sending message through say-substitute: {}".format(" - ".join(args)))
 | 
					 | 
				
			||||||
    slack_client.chat_postMessage(
 | 
					 | 
				
			||||||
        channel=config["archive_id"],
 | 
					 | 
				
			||||||
        text=" - ".join(args),
 | 
					 | 
				
			||||||
        **kwargs
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def save_as_related_file(url, article_object):
 | 
					 | 
				
			||||||
    r = requests.get(url, headers={"Authorization": "Bearer {}".format(slack_client.token)})
 | 
					 | 
				
			||||||
    saveto = article_object.save_path
 | 
					 | 
				
			||||||
    ftype = url[url.rfind(".") + 1:]
 | 
					 | 
				
			||||||
    fname = "{} - related no {}.{}".format(
 | 
					 | 
				
			||||||
        article_object.file_name.replace(".pdf",""),
 | 
					 | 
				
			||||||
        len(article_object.related) + 1,
 | 
					 | 
				
			||||||
        ftype
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
    with open(os.path.join(saveto, fname), "wb") as f:
 | 
					 | 
				
			||||||
        f.write(r.content)
 | 
					 | 
				
			||||||
    article_object.set_related([fname])
 | 
					 | 
				
			||||||
    logger.info("Added {} to model {}".format(fname, article_object))
 | 
					 | 
				
			||||||
    return fname
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def react_file_path_message(fname, article_object):
 | 
					 | 
				
			||||||
    saveto = article_object.save_path
 | 
					 | 
				
			||||||
    file_path = os.path.join(saveto, fname)
 | 
					 | 
				
			||||||
    if os.path.exists(file_path):
 | 
					 | 
				
			||||||
        article_object.set_related([fname])
 | 
					 | 
				
			||||||
        logger.info("Added {} to model {}".format(fname, article_object))
 | 
					 | 
				
			||||||
        return True
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def is_message_in_archiving(message) -> bool:
 | 
					 | 
				
			||||||
    if isinstance(message, dict):
 | 
					 | 
				
			||||||
        return message["channel"] == config["archive_id"]
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        return message.channel_id == config["archive_id"]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def is_reaction_in_archiving(event) -> bool:
 | 
					 | 
				
			||||||
    if isinstance(event, dict):
 | 
					 | 
				
			||||||
        return event["item"]["channel"] == config["archive_id"]
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        return event.message.channel_id == config["archive_id"]
 | 
					 | 
				
			||||||
@@ -1,189 +0,0 @@
 | 
				
			|||||||
from slack_bolt import App
 | 
					 | 
				
			||||||
from slack_bolt.adapter.socket_mode import SocketModeHandler
 | 
					 | 
				
			||||||
from slack_sdk.errors import SlackApiError
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import logging
 | 
					 | 
				
			||||||
import configuration
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from . import message_helpers
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
config = configuration.parsed["SLACK"]
 | 
					 | 
				
			||||||
models = configuration.models
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class BotApp(App):
 | 
					 | 
				
			||||||
    logger = logging.getLogger(__name__)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def __init__(self, callback, *args, **kwargs):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        super().__init__(*args, **kwargs)
 | 
					 | 
				
			||||||
        self.callback = callback
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def pre_start(self):
 | 
					 | 
				
			||||||
        message_helpers.init(self.client)
 | 
					 | 
				
			||||||
        missed_messages, missed_reactions = message_helpers.get_unhandled_messages()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        [self.handle_incoming_message(m) for m in missed_messages]
 | 
					 | 
				
			||||||
        [self.handle_incoming_reaction(r) for r in missed_reactions]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # self.react_missed_reactions(missed_reactions)
 | 
					 | 
				
			||||||
        # self.react_missed_messages(missed_messages)
 | 
					 | 
				
			||||||
        self.startup_status()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def handle_incoming_reaction(self, reaction):
 | 
					 | 
				
			||||||
        if isinstance(reaction, dict): #else: the reaction is already being passed as a model
 | 
					 | 
				
			||||||
            # CAUTION: filter for 'changed reactions' those are nasty (usually when adding an url)
 | 
					 | 
				
			||||||
            reaction = message_helpers.reaction_dict_to_model(reaction)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        thread = reaction.message.thread
 | 
					 | 
				
			||||||
        article_object = thread.article
 | 
					 | 
				
			||||||
        if not article_object is None:
 | 
					 | 
				
			||||||
            reaction = reaction.type
 | 
					 | 
				
			||||||
            status = 1 if reaction == "white_check_mark" else -1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            # self.logger.info(f"Applying reaction {reaction} to its root message.")
 | 
					 | 
				
			||||||
            article_object.verified = status
 | 
					 | 
				
			||||||
            article_object.save()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def handle_incoming_message(self, message):
 | 
					 | 
				
			||||||
        """Reacts to all messages inside channel archiving. Must then
 | 
					 | 
				
			||||||
        distinguish between threaded replies and new requests
 | 
					 | 
				
			||||||
        and react accordingly"""
 | 
					 | 
				
			||||||
        if isinstance(message, dict): #else: the message is already being passed as a model
 | 
					 | 
				
			||||||
            # CAUTION: filter for 'changed messages' those are nasty (usually when adding an url)
 | 
					 | 
				
			||||||
            if message.get("subtype", "not bad") == "message_changed":
 | 
					 | 
				
			||||||
                return False
 | 
					 | 
				
			||||||
            message = message_helpers.message_dict_to_model(message)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # First check: belongs to thread?
 | 
					 | 
				
			||||||
        is_threaded = message.thread.message_count > 1 and message != message.thread.initiator_message
 | 
					 | 
				
			||||||
        if is_threaded:
 | 
					 | 
				
			||||||
            self.incoming_thread_message(message)
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            self.incoming_channel_message(message)
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def incoming_thread_message(self, message):
 | 
					 | 
				
			||||||
        if message.user.user_id == config["bot_id"]:
 | 
					 | 
				
			||||||
            return True # ignore the files uploaded by the bot. We handled them already!
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        thread = message.thread
 | 
					 | 
				
			||||||
        if thread.is_fully_processed:
 | 
					 | 
				
			||||||
            return True
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        self.logger.info("Receiving thread-message")
 | 
					 | 
				
			||||||
        self.respond_thread_message(message)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def incoming_channel_message(self, message):
 | 
					 | 
				
			||||||
        self.logger.info(f"Handling message {message} ({len(message.urls)} urls)")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if not message.urls: # no urls in a root-message => IGNORE
 | 
					 | 
				
			||||||
            message.is_processed_override = True
 | 
					 | 
				
			||||||
            message.save()
 | 
					 | 
				
			||||||
            return
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # ensure thread is still empty, this is a scenario encountered only in testing, but let's just filter it
 | 
					 | 
				
			||||||
        if message.thread.message_count > 1:
 | 
					 | 
				
			||||||
            self.logger.info("Discarded message because it is actually processed.")
 | 
					 | 
				
			||||||
            return
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        if len(message.urls) > 1:
 | 
					 | 
				
			||||||
            message_helpers.say_substitute("Only the first url is being handled. Please send any subsequent url as a separate message", thread_ts=message.thread.slack_ts)
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        self.callback(message)
 | 
					 | 
				
			||||||
        # for url in message.urls:
 | 
					 | 
				
			||||||
            # self.callback(url, message)
 | 
					 | 
				
			||||||
            # stop here!
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def respond_thread_message(self, message, say=message_helpers.say_substitute):
 | 
					 | 
				
			||||||
        thread = message.thread
 | 
					 | 
				
			||||||
        article = thread.article
 | 
					 | 
				
			||||||
        if message.perma_link: # file upload means new data    
 | 
					 | 
				
			||||||
            fname = message_helpers.save_as_related_file(message.perma_link, article)
 | 
					 | 
				
			||||||
            say("File was saved as 'related file' under `{}`.".format(fname),
 | 
					 | 
				
			||||||
                thread_ts=thread.slack_ts
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
        else: # either a pointer to a new file (too large to upload), or trash
 | 
					 | 
				
			||||||
            success = message_helpers.react_file_path_message(message.text, article)
 | 
					 | 
				
			||||||
            if success:
 | 
					 | 
				
			||||||
                say("File was saved as 'related file'", thread_ts=thread.slack_ts)
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                self.logger.error("User replied to thread {} but the response did not contain a file/path".format(thread))
 | 
					 | 
				
			||||||
                say("Cannot process response without associated file.",
 | 
					 | 
				
			||||||
                    thread_ts=thread.slack_ts
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def respond_channel_message(self, thread, say=message_helpers.say_substitute):
 | 
					 | 
				
			||||||
        article = thread.article
 | 
					 | 
				
			||||||
        answers = article.slack_info
 | 
					 | 
				
			||||||
        for a in answers:
 | 
					 | 
				
			||||||
            if a["file_path"]:
 | 
					 | 
				
			||||||
                try: # upload resulted in an error
 | 
					 | 
				
			||||||
                    self.client.files_upload(
 | 
					 | 
				
			||||||
                        channels = config["archive_id"],
 | 
					 | 
				
			||||||
                        initial_comment = f"<@{config['responsible_id']}> \n {a['reply_text']}",
 | 
					 | 
				
			||||||
                        file = a["file_path"],
 | 
					 | 
				
			||||||
                        thread_ts = thread.slack_ts
 | 
					 | 
				
			||||||
                    )
 | 
					 | 
				
			||||||
                    status = True
 | 
					 | 
				
			||||||
                except SlackApiError as e:
 | 
					 | 
				
			||||||
                    say(
 | 
					 | 
				
			||||||
                        "File {} could not be uploaded.".format(a),
 | 
					 | 
				
			||||||
                        thread_ts=thread.slack_ts
 | 
					 | 
				
			||||||
                    )
 | 
					 | 
				
			||||||
                    status = False
 | 
					 | 
				
			||||||
                    self.logger.error(f"File upload failed: {e}")
 | 
					 | 
				
			||||||
            else: # anticipated that there is no file!
 | 
					 | 
				
			||||||
                say(
 | 
					 | 
				
			||||||
                    f"<@{config['responsible_id']}> \n {a['reply_text']}",
 | 
					 | 
				
			||||||
                    thread_ts=thread.slack_ts
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
                status = True
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def startup_status(self):
 | 
					 | 
				
			||||||
        threads = [t for t in models.Thread.select()]
 | 
					 | 
				
			||||||
        all_threads = len(threads)
 | 
					 | 
				
			||||||
        fully_processed = len([t for t in threads if t.is_fully_processed])
 | 
					 | 
				
			||||||
        fully_unprocessed = len([t for t in threads if t.message_count == 1])
 | 
					 | 
				
			||||||
        articles_unprocessed = len(models.ArticleDownload.select().where(models.ArticleDownload.verified < 1))
 | 
					 | 
				
			||||||
        self.logger.info(f"[bold]STATUS[/bold]: Fully processed {fully_processed}/{all_threads} threads. {fully_unprocessed} threads have 0 replies. Article-objects to verify: {articles_unprocessed}", extra={"markup": True})
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class BotRunner():
 | 
					 | 
				
			||||||
    """Stupid encapsulation so that we can apply the slack decorators to the BotApp"""
 | 
					 | 
				
			||||||
    def __init__(self, callback, *args, **kwargs) -> None:
 | 
					 | 
				
			||||||
        self.bot_worker = BotApp(callback, token=config["auth_token"])
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        @self.bot_worker.event(event="message", matchers=[message_helpers.is_message_in_archiving])
 | 
					 | 
				
			||||||
        def handle_incoming_message(message, say):
 | 
					 | 
				
			||||||
            return self.bot_worker.handle_incoming_message(message)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        @self.bot_worker.event(event="reaction_added", matchers=[message_helpers.is_reaction_in_archiving])
 | 
					 | 
				
			||||||
        def handle_incoming_reaction(event, say):
 | 
					 | 
				
			||||||
            return self.bot_worker.handle_incoming_reaction(event)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        self.handler = SocketModeHandler(self.bot_worker, config["app_token"])
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def start(self):
 | 
					 | 
				
			||||||
        self.bot_worker.pre_start()
 | 
					 | 
				
			||||||
        self.handler.start()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def stop(self):
 | 
					 | 
				
			||||||
        self.handler.close()
 | 
					 | 
				
			||||||
        print("Bye handler!")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # def respond_to_message(self, message):
 | 
					 | 
				
			||||||
    #     self.bot_worker.handle_incoming_message(message)
 | 
					 | 
				
			||||||
@@ -1,331 +0,0 @@
 | 
				
			|||||||
import logging
 | 
					 | 
				
			||||||
logger = logging.getLogger(__name__)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from peewee import *
 | 
					 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import markdown
 | 
					 | 
				
			||||||
import re
 | 
					 | 
				
			||||||
import configuration
 | 
					 | 
				
			||||||
import datetime
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
config = configuration.parsed["DOWNLOADS"]
 | 
					 | 
				
			||||||
slack_config = configuration.parsed["SLACK"]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Helpers
 | 
					 | 
				
			||||||
chat_db = DatabaseProxy()
 | 
					 | 
				
			||||||
download_db = DatabaseProxy()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# set the nature of the db at runtime
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class DownloadBaseModel(Model):
 | 
					 | 
				
			||||||
    class Meta:
 | 
					 | 
				
			||||||
        database = download_db
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class ChatBaseModel(Model):
 | 
					 | 
				
			||||||
    class Meta:
 | 
					 | 
				
			||||||
        database = chat_db
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## == Article related models == ##
 | 
					 | 
				
			||||||
class ArticleDownload(DownloadBaseModel):
 | 
					 | 
				
			||||||
    title = CharField(default='')
 | 
					 | 
				
			||||||
    pub_date = DateField(default = '')
 | 
					 | 
				
			||||||
    download_date = DateField(default = datetime.date.today)
 | 
					 | 
				
			||||||
    source_name = CharField(default = '')
 | 
					 | 
				
			||||||
    article_url = TextField(default = '', unique=True)
 | 
					 | 
				
			||||||
    archive_url = TextField(default = '')
 | 
					 | 
				
			||||||
    file_name = TextField(default = '')
 | 
					 | 
				
			||||||
    language = CharField(default = '')
 | 
					 | 
				
			||||||
    summary = TextField(default = '')
 | 
					 | 
				
			||||||
    comment = TextField(default = '')
 | 
					 | 
				
			||||||
    verified = IntegerField(default = False)
 | 
					 | 
				
			||||||
    # authors
 | 
					 | 
				
			||||||
    # keywords
 | 
					 | 
				
			||||||
    # ... are added through foreignkeys
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def __str__(self) -> str:
 | 
					 | 
				
			||||||
        if self.title != '' and self.source_name != '':
 | 
					 | 
				
			||||||
            desc = f"{shorten_name(self.title)} -- {self.source_name}"
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            desc = f"{self.article_url}"
 | 
					 | 
				
			||||||
        return f"ART [{desc}]"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    ## Useful Properties
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def save_path(self):
 | 
					 | 
				
			||||||
        return f"{config['local_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def fname_nas(self, file_name=""):
 | 
					 | 
				
			||||||
        if self.download_date:
 | 
					 | 
				
			||||||
            if file_name:
 | 
					 | 
				
			||||||
                return "NAS: {}/{}/{}/{}".format(config["remote_storage_path"], self.download_date.year, self.download_date.strftime("%B"), file_name)
 | 
					 | 
				
			||||||
            else: # return the self. name
 | 
					 | 
				
			||||||
                return "NAS: {}/{}/{}/{}".format(config["remote_storage_path"], self.download_date.year, self.download_date.strftime("%B"), self.file_name)
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def fname_template(self):
 | 
					 | 
				
			||||||
        if "youtube.com" in self.source_name or "youtu.be" in self.source_name:
 | 
					 | 
				
			||||||
            fname = "{} -- {}".format(self.source_name, self.title)
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            fname = "{} -- {}.pdf".format(self.source_name, self.title)
 | 
					 | 
				
			||||||
        return clear_path_name(fname)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def is_title_bad(self):  # add incrementally
 | 
					 | 
				
			||||||
        return "PUR-Abo" in self.title \
 | 
					 | 
				
			||||||
            or "Redirecting" in self.title \
 | 
					 | 
				
			||||||
            or "Error while running fetch" in self.title
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def slack_info(self):
 | 
					 | 
				
			||||||
        status = [":x: No better version available", ":gear: Verification pending", ":white_check_mark: Verified by human"][self.verified + 1]
 | 
					 | 
				
			||||||
        content = "\n>" + "\n>".join(self.summary.split("\n"))
 | 
					 | 
				
			||||||
        file_status, msg = self.file_status()
 | 
					 | 
				
			||||||
        if not file_status:
 | 
					 | 
				
			||||||
            return [msg]
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        # everything alright: generate real content
 | 
					 | 
				
			||||||
        # first the base file
 | 
					 | 
				
			||||||
        if self.file_name[-4:] == ".pdf":
 | 
					 | 
				
			||||||
            answer = [{ # main reply with the base pdf
 | 
					 | 
				
			||||||
                "reply_text" : f"*{self.title}*\n{status}\n{content}",
 | 
					 | 
				
			||||||
                "file_path" : self.save_path + self.file_name 
 | 
					 | 
				
			||||||
            }]
 | 
					 | 
				
			||||||
        else: # don't upload if the file is too big!
 | 
					 | 
				
			||||||
            location = "Not uploaded to slack, but the file will be on the NAS:\n`{}`".format(self.fname_nas())
 | 
					 | 
				
			||||||
            answer = [{ # main reply with the base pdf
 | 
					 | 
				
			||||||
                "reply_text" : "*{}*\n{}\n{}\n{}".format(self.title, status, content, location),
 | 
					 | 
				
			||||||
                "file_path" : None 
 | 
					 | 
				
			||||||
            }]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # then the related files
 | 
					 | 
				
			||||||
        rel_text = ""
 | 
					 | 
				
			||||||
        for r in self.related:
 | 
					 | 
				
			||||||
            fname = r.related_file_name
 | 
					 | 
				
			||||||
            lentry = "\n• `{}` ".format(self.fname_nas(fname))
 | 
					 | 
				
			||||||
            if fname[-4:] == ".pdf": # this is a manageable file, directly upload
 | 
					 | 
				
			||||||
                f_ret = self.save_path + fname
 | 
					 | 
				
			||||||
                answer.append({"reply_text":"", "file_path" : f_ret})
 | 
					 | 
				
			||||||
            else: # not pdf <=> too large. Don't upload but mention its existence
 | 
					 | 
				
			||||||
                lentry += "(not uploaded to slack, but the file will be on the NAS)"
 | 
					 | 
				
			||||||
                
 | 
					 | 
				
			||||||
            rel_text += lentry
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if rel_text:
 | 
					 | 
				
			||||||
            rel_text = answer[0]["reply_text"] = answer[0]["reply_text"] + "\nRelated files:\n" + rel_text
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        return answer
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def mail_info(self):
 | 
					 | 
				
			||||||
        base = [{"reply_text": "[{}]({})\n".format(self.article_url, self.article_url), "file_path":None}] + self.slack_info
 | 
					 | 
				
			||||||
        return [{"reply_text": markdown.markdown(m["reply_text"]), "file_path": m["file_path"]} for m in base]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    ## Helpers
 | 
					 | 
				
			||||||
    def set_keywords(self, keywords):
 | 
					 | 
				
			||||||
        for k in keywords:
 | 
					 | 
				
			||||||
            ArticleKeyword.create(
 | 
					 | 
				
			||||||
                article = self,
 | 
					 | 
				
			||||||
                keyword = k
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def set_authors(self, authors):
 | 
					 | 
				
			||||||
        for a in authors:
 | 
					 | 
				
			||||||
            ArticleAuthor.create(
 | 
					 | 
				
			||||||
                article = self,
 | 
					 | 
				
			||||||
                author = a
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def set_references(self, references):
 | 
					 | 
				
			||||||
        for r in references:
 | 
					 | 
				
			||||||
            ArticleReference.create(
 | 
					 | 
				
			||||||
                article = self,
 | 
					 | 
				
			||||||
                reference_url = r
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def set_related(self, related):
 | 
					 | 
				
			||||||
        for r in related:
 | 
					 | 
				
			||||||
            ArticleRelated.create(
 | 
					 | 
				
			||||||
                article = self,
 | 
					 | 
				
			||||||
                related_file_name = r
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def file_status(self):
 | 
					 | 
				
			||||||
        if not self.file_name:
 | 
					 | 
				
			||||||
            logger.error("Article {} has no filename!".format(self))
 | 
					 | 
				
			||||||
            return False, {"reply_text": "Download failed, no file was saved.", "file_path": None}
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        file_path_abs = self.save_path + self.file_name
 | 
					 | 
				
			||||||
        if not os.path.exists(file_path_abs):
 | 
					 | 
				
			||||||
            logger.error("Article {} has a filename, but the file does not exist at that location!".format(self))
 | 
					 | 
				
			||||||
            return False, {"reply_text": "Can't find file. Either the download failed or the file was moved.", "file_path": None}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return True, {}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class ArticleKeyword(DownloadBaseModel):
 | 
					 | 
				
			||||||
    # instance gets created for every one keyword -> flexible in size
 | 
					 | 
				
			||||||
    article = ForeignKeyField(ArticleDownload, backref='keywords')
 | 
					 | 
				
			||||||
    keyword = CharField()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class ArticleAuthor(DownloadBaseModel):
 | 
					 | 
				
			||||||
    article = ForeignKeyField(ArticleDownload, backref='authors')
 | 
					 | 
				
			||||||
    author = CharField()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class ArticleReference(DownloadBaseModel):
 | 
					 | 
				
			||||||
    article = ForeignKeyField(ArticleDownload, backref='references')
 | 
					 | 
				
			||||||
    reference_url = TextField(default = '')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class ArticleRelated(DownloadBaseModel):
 | 
					 | 
				
			||||||
    article = ForeignKeyField(ArticleDownload, backref='related')
 | 
					 | 
				
			||||||
    related_file_name = TextField(default = '')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## == Slack-thread related models == ##
 | 
					 | 
				
			||||||
class User(ChatBaseModel):
 | 
					 | 
				
			||||||
    user_id = CharField(default='', unique=True)
 | 
					 | 
				
			||||||
    # messages
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class Thread(ChatBaseModel):
 | 
					 | 
				
			||||||
    """The threads that concern us are only created if the base massage contains a url"""
 | 
					 | 
				
			||||||
    thread_ts = FloatField(default = 0)
 | 
					 | 
				
			||||||
    article = ForeignKeyField(ArticleDownload, backref="slack_thread", null=True, default=None)
 | 
					 | 
				
			||||||
    # provides, ts, user, models
 | 
					 | 
				
			||||||
    # messages
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def slack_ts(self):
 | 
					 | 
				
			||||||
        str_ts = str(self.thread_ts)
 | 
					 | 
				
			||||||
        cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem!
 | 
					 | 
				
			||||||
        return "{}{}".format(str_ts, cut_zeros*"0")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def initiator_message(self):
 | 
					 | 
				
			||||||
        try:
 | 
					 | 
				
			||||||
            return self.messages[0] # TODO check if this needs sorting
 | 
					 | 
				
			||||||
        except IndexError:
 | 
					 | 
				
			||||||
            logger.warning(f"Thread {self} is empty. How can that be?")
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def message_count(self):
 | 
					 | 
				
			||||||
        # logger.warning("message_count was called")
 | 
					 | 
				
			||||||
        return self.messages.count()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def last_message(self):
 | 
					 | 
				
			||||||
        messages = Message.select().where(Message.thread == self).order_by(Message.ts) # can't be empty by definition/creation
 | 
					 | 
				
			||||||
        return messages[-1]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def is_fully_processed(self) -> bool:
 | 
					 | 
				
			||||||
        init_message = self.initiator_message
 | 
					 | 
				
			||||||
        if init_message is None:
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        if init_message.is_processed_override:
 | 
					 | 
				
			||||||
            return True
 | 
					 | 
				
			||||||
        # this override is set for instance, when no url was sent at all. Then set this thread to be ignored
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        reactions = init_message.reaction
 | 
					 | 
				
			||||||
        if not reactions:
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            r = reactions[0].type # can and should only have one reaction
 | 
					 | 
				
			||||||
            return r == "white_check_mark" \
 | 
					 | 
				
			||||||
                or r == "x"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
class Message(ChatBaseModel):
 | 
					 | 
				
			||||||
    ts = FloatField(unique=True) #for sorting
 | 
					 | 
				
			||||||
    channel_id = CharField(default='')
 | 
					 | 
				
			||||||
    user = ForeignKeyField(User, backref="messages")
 | 
					 | 
				
			||||||
    text = TextField(default='')
 | 
					 | 
				
			||||||
    thread = ForeignKeyField(Thread, backref="messages", default=None)
 | 
					 | 
				
			||||||
    file_type = CharField(default='')
 | 
					 | 
				
			||||||
    perma_link = CharField(default='')
 | 
					 | 
				
			||||||
    is_processed_override = BooleanField(default=False)
 | 
					 | 
				
			||||||
    # reaction
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def __str__(self) -> str:
 | 
					 | 
				
			||||||
        return "MSG [{}]".format(shorten_name(self.text).replace('\n','/'))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def slack_ts(self):
 | 
					 | 
				
			||||||
        str_ts = str(self.ts)
 | 
					 | 
				
			||||||
        cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem!
 | 
					 | 
				
			||||||
        return "{}{}".format(str_ts, cut_zeros * "0")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def urls(self):
 | 
					 | 
				
			||||||
        pattern = r"<(.*?)>"
 | 
					 | 
				
			||||||
        matches = re.findall(pattern, self.text)
 | 
					 | 
				
			||||||
        matches = [m for m in matches if "." in m]
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        new_matches = []
 | 
					 | 
				
			||||||
        for m in matches:
 | 
					 | 
				
			||||||
            if "." in m:  # must contain a tld, right?
 | 
					 | 
				
			||||||
                # further complication: slack automatically abreviates urls in the format: 
 | 
					 | 
				
			||||||
                # <url|link preview>. Lucky for us, "|" is a character derecommended in urls, meaning we can "safely" split for it and retain the first half
 | 
					 | 
				
			||||||
                if "|" in m:
 | 
					 | 
				
			||||||
                    keep = m.split("|")[0]
 | 
					 | 
				
			||||||
                else:
 | 
					 | 
				
			||||||
                    keep = m
 | 
					 | 
				
			||||||
                new_matches.append(keep)
 | 
					 | 
				
			||||||
        return new_matches
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def is_by_human(self):
 | 
					 | 
				
			||||||
        return self.user.user_id != slack_config["bot_id"]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    @property
 | 
					 | 
				
			||||||
    def has_single_url(self):
 | 
					 | 
				
			||||||
        return len(self.urls) == 1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class Reaction(ChatBaseModel):
 | 
					 | 
				
			||||||
    type = CharField(default = "")
 | 
					 | 
				
			||||||
    message = ForeignKeyField(Message, backref="reaction")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def create_tables():
 | 
					 | 
				
			||||||
    with download_db:
 | 
					 | 
				
			||||||
        download_db.create_tables([ArticleDownload, ArticleKeyword, ArticleAuthor, ArticleReference, ArticleRelated])
 | 
					 | 
				
			||||||
    with chat_db:
 | 
					 | 
				
			||||||
        chat_db.create_tables([User, Message, Thread, Reaction])
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def set_db(chat_db_object, download_db_object):
 | 
					 | 
				
			||||||
    chat_db.initialize(chat_db_object)
 | 
					 | 
				
			||||||
    download_db.initialize(download_db_object)
 | 
					 | 
				
			||||||
    create_tables()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def clear_path_name(path):
 | 
					 | 
				
			||||||
    keepcharacters = (' ','.','_', '-')
 | 
					 | 
				
			||||||
    converted = "".join([c if (c.isalnum() or c in keepcharacters) else "_" for c in path]).rstrip()
 | 
					 | 
				
			||||||
    return converted
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def shorten_name(name, offset = 50):
 | 
					 | 
				
			||||||
    if len(name) > offset:
 | 
					 | 
				
			||||||
        return name[:offset] + "..."
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        return name
 | 
					 | 
				
			||||||
							
								
								
									
										66
									
								
								news_fetch/configuration.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								news_fetch/configuration.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,66 @@
 | 
				
			|||||||
 | 
					import os
 | 
				
			||||||
 | 
					import shutil
 | 
				
			||||||
 | 
					import configparser
 | 
				
			||||||
 | 
					import logging
 | 
				
			||||||
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					from peewee import SqliteDatabase, PostgresqlDatabase
 | 
				
			||||||
 | 
					from rich.logging import RichHandler
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# first things first: logging
 | 
				
			||||||
 | 
					logging.basicConfig(
 | 
				
			||||||
 | 
					    format='%(message)s',
 | 
				
			||||||
 | 
					    level=logging.INFO,
 | 
				
			||||||
 | 
					    datefmt='%H:%M:%S', # add %Y-%m-%d if needed
 | 
				
			||||||
 | 
					    handlers=[RichHandler()]
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					logger = logging.getLogger(__name__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# load config file containing constants and secrets
 | 
				
			||||||
 | 
					main_config = configparser.ConfigParser()
 | 
				
			||||||
 | 
					main_config.read("/app/containerdata/config/news_fetch.config.ini")
 | 
				
			||||||
 | 
					db_config = configparser.ConfigParser()
 | 
				
			||||||
 | 
					db_config.read("/app/containerdata/config/db.config.ini")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# DEBUG MODE:
 | 
				
			||||||
 | 
					if os.getenv("DEBUG", "false") == "true":
 | 
				
			||||||
 | 
					    logger.warning("Found 'DEBUG=true', setting up dummy databases")
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    main_config["SLACK"]["archive_id"] = main_config["SLACK"]["debug_id"]
 | 
				
			||||||
 | 
					    main_config["MAIL"]["recipient"] = main_config["MAIL"]["sender"]
 | 
				
			||||||
 | 
					    main_config["DOWNLOADS"]["local_storage_path"] = main_config["DOWNLOADS"]["debug_storage_path"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    download_db = SqliteDatabase(
 | 
				
			||||||
 | 
					        main_config["DATABASE"]["download_db_debug"],
 | 
				
			||||||
 | 
					        pragmas = {'journal_mode': 'wal'} # mutliple threads can read at once
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# PRODUCTION MODE:
 | 
				
			||||||
 | 
					else:
 | 
				
			||||||
 | 
					    logger.warning("Found 'DEBUG=false' and running on production databases, I hope you know what you're doing...")
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    cred = db_config["DATABASE"]
 | 
				
			||||||
 | 
					    download_db = PostgresqlDatabase(
 | 
				
			||||||
 | 
					        cred["db_name"], user=cred["user_name"], password=cred["password"], host="vpn", port=5432
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    # TODO Reimplement backup/printout
 | 
				
			||||||
 | 
					    # logger.info("Backing up databases")
 | 
				
			||||||
 | 
					    # backup_dst = main_config["DATABASE"]["db_backup"]
 | 
				
			||||||
 | 
					    # today = datetime.today().strftime("%Y.%m.%d")
 | 
				
			||||||
 | 
					    # shutil.copyfile(
 | 
				
			||||||
 | 
					    #     os.path.join(db_base_path, main_config["DATABASE"]["chat_db_name"]), 
 | 
				
			||||||
 | 
					    #     os.path.join(backup_dst, today + "." + main_config["DATABASE"]["chat_db_name"]), 
 | 
				
			||||||
 | 
					    #     )
 | 
				
			||||||
 | 
					    # shutil.copyfile(
 | 
				
			||||||
 | 
					    #     os.path.join(db_base_path, main_config["DATABASE"]["download_db_name"]), 
 | 
				
			||||||
 | 
					    #     os.path.join(backup_dst, today + "." + main_config["DATABASE"]["download_db_name"]), 
 | 
				
			||||||
 | 
					    #     )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from utils_storage import models
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Set up the database
 | 
				
			||||||
 | 
					models.set_db(download_db)
 | 
				
			||||||
@@ -3,7 +3,6 @@ import configuration
 | 
				
			|||||||
models = configuration.models
 | 
					models = configuration.models
 | 
				
			||||||
from threading import Thread
 | 
					from threading import Thread
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
logger = logging.getLogger(__name__)
 | 
					logger = logging.getLogger(__name__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -14,10 +13,9 @@ from utils_worker.workers import CompressWorker, DownloadWorker, FetchWorker, Up
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class ArticleWatcher:
 | 
					class ArticleWatcher:
 | 
				
			||||||
    """Wrapper for a newly created article object. Notifies the coordinator upon change/completition"""
 | 
					    """Wrapper for a newly created article object. Notifies the coordinator upon change/completition"""
 | 
				
			||||||
    def __init__(self, article, thread, **kwargs) -> None:
 | 
					    def __init__(self, article, **kwargs) -> None:
 | 
				
			||||||
        self.article_id = article.id # in case article becomes None at any point, we can still track the article
 | 
					        self.article_id = article.id # in case article becomes None at any point, we can still track the article
 | 
				
			||||||
        self.article = article
 | 
					        self.article = article
 | 
				
			||||||
        self.thread = thread
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.completition_notifier = kwargs.get("notifier")
 | 
					        self.completition_notifier = kwargs.get("notifier")
 | 
				
			||||||
        self.fetch = kwargs.get("worker_fetch", None)
 | 
					        self.fetch = kwargs.get("worker_fetch", None)
 | 
				
			||||||
@@ -50,7 +48,7 @@ class ArticleWatcher:
 | 
				
			|||||||
        elif completed_action == "download":
 | 
					        elif completed_action == "download":
 | 
				
			||||||
            self.compress.process(self)
 | 
					            self.compress.process(self)
 | 
				
			||||||
        elif completed_action == "compress": # last step
 | 
					        elif completed_action == "compress": # last step
 | 
				
			||||||
            self.completition_notifier(self.article, self.thread)
 | 
					            self.completition_notifier(self.article)
 | 
				
			||||||
            # triggers action in Coordinator
 | 
					            # triggers action in Coordinator
 | 
				
			||||||
        elif completed_action == "upload":
 | 
					        elif completed_action == "upload":
 | 
				
			||||||
            # this case occurs when upload was faster than compression
 | 
					            # this case occurs when upload was faster than compression
 | 
				
			||||||
@@ -118,17 +116,34 @@ class Coordinator(Thread):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def launch(self) -> None:
 | 
					    def launch(self) -> None:
 | 
				
			||||||
        for w in [self.worker_download, self.worker_fetch, self.worker_upload, self.worker_compress]:
 | 
					        for w in [self.worker_download, self.worker_fetch, self.worker_upload, self.worker_compress]:
 | 
				
			||||||
            if not w is None:
 | 
					            if not w is None: # for reduced operations such as upload, some workers are set to None
 | 
				
			||||||
                w.start()
 | 
					                w.start()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # if past messages have not been sent, they must be reevaluated
 | 
				
			||||||
 | 
					        unsent = models.ArticleDownload.filter(sent = False)
 | 
				
			||||||
 | 
					        # .objects.filter(sent = False)
 | 
				
			||||||
 | 
					        for a in unsent:
 | 
				
			||||||
 | 
					            print(a)
 | 
				
			||||||
 | 
					            self.incoming_request(article=a)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def incoming_request(self, message):
 | 
					
 | 
				
			||||||
        """This method is passed onto the slack worker. It gets triggered when a new message is received."""
 | 
					    def incoming_request(self, message=None, article=None):
 | 
				
			||||||
 | 
					        """This method is passed onto the slack worker. It then is called when a new message is received."""
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        if message is not None:
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
                url = message.urls[0] # ignore all the other ones
 | 
					                url = message.urls[0] # ignore all the other ones
 | 
				
			||||||
 | 
					            except IndexError:
 | 
				
			||||||
 | 
					                return
 | 
				
			||||||
            article, is_new = models.ArticleDownload.get_or_create(article_url=url)
 | 
					            article, is_new = models.ArticleDownload.get_or_create(article_url=url)
 | 
				
			||||||
        thread = message.thread
 | 
					            article.slack_ts = message.ts # either update the timestamp (to the last reference to the article) or set it for the first time
 | 
				
			||||||
        thread.article = article
 | 
					        elif article is not None:
 | 
				
			||||||
        thread.save()
 | 
					            is_new = False
 | 
				
			||||||
 | 
					            logger.info(f"Received article {article} in incoming_request")
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            logger.error("Coordinator.incoming_request called with no arguments")
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.kwargs.update({"notifier" : self.article_complete_notifier})
 | 
					        self.kwargs.update({"notifier" : self.article_complete_notifier})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if is_new or (article.file_name == "" and article.verified == 0):
 | 
					        if is_new or (article.file_name == "" and article.verified == 0):
 | 
				
			||||||
@@ -136,7 +151,6 @@ class Coordinator(Thread):
 | 
				
			|||||||
            # this overwrites previously set information, but that should not be too important
 | 
					            # this overwrites previously set information, but that should not be too important
 | 
				
			||||||
            ArticleWatcher(
 | 
					            ArticleWatcher(
 | 
				
			||||||
                article,
 | 
					                article,
 | 
				
			||||||
                thread,
 | 
					 | 
				
			||||||
                **self.kwargs   
 | 
					                **self.kwargs   
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -146,7 +160,7 @@ class Coordinator(Thread):
 | 
				
			|||||||
            # the watcher will notify once it is sufficiently populated
 | 
					            # the watcher will notify once it is sufficiently populated
 | 
				
			||||||
        else: # manually trigger notification immediatly
 | 
					        else: # manually trigger notification immediatly
 | 
				
			||||||
            logger.info(f"Found existing article {article}. Now sending")
 | 
					            logger.info(f"Found existing article {article}. Now sending")
 | 
				
			||||||
            self.article_complete_notifier(article, thread)
 | 
					            self.article_complete_notifier(article)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -155,34 +169,33 @@ class Coordinator(Thread):
 | 
				
			|||||||
            w.start()
 | 
					            w.start()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for article in articles:
 | 
					        for article in articles:
 | 
				
			||||||
            notifier = lambda article: print(f"Completed manual actions for {article}")
 | 
					            notifier = lambda article: logger.info(f"Completed manual actions for {article}")
 | 
				
			||||||
            ArticleWatcher(article, None, workers_manual = workers, notifier = notifier) # Article watcher wants a thread to link article to TODO: handle threads as a kwarg 
 | 
					            ArticleWatcher(article, None, workers_manual = workers, notifier = notifier) # Article watcher wants a thread to link article to TODO: handle threads as a kwarg 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def article_complete_notifier(self, article, thread):
 | 
					    def article_complete_notifier(self, article):
 | 
				
			||||||
        if self.worker_slack is None:
 | 
					        if self.worker_slack is None:
 | 
				
			||||||
            logger.warning("Not sending slack notifier")
 | 
					            logger.warning("Skipping slack notification because worker is None")
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self.worker_slack.bot_worker.respond_channel_message(thread)
 | 
					            self.worker_slack.bot_worker.respond_channel_message(article)
 | 
				
			||||||
        if self.worker_mail is None:
 | 
					        if self.worker_mail is None:
 | 
				
			||||||
            logger.warning("Not sending mail notifier")
 | 
					            logger.warning("Skipping mail notification because worker is None")
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self.worker_mail.send(article)
 | 
					            self.worker_mail.send(article)
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
 | 
					        article.sent = True
 | 
				
			||||||
 | 
					        article.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == "__main__":
 | 
					if __name__ == "__main__":
 | 
				
			||||||
    coordinator = Coordinator()
 | 
					    coordinator = Coordinator()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if os.getenv("UPLOAD", "false") == "true":
 | 
					    if "upload" in sys.argv:
 | 
				
			||||||
        articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "").execute()
 | 
					        articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "").execute()
 | 
				
			||||||
        logger.info(f"Launching upload to archive for {len(articles)} articles.")
 | 
					        logger.info(f"Launching upload to archive for {len(articles)} articles.")
 | 
				
			||||||
        coordinator.manual_processing(articles, [UploadWorker()])
 | 
					        coordinator.manual_processing(articles, [UploadWorker()])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    elif os.getenv("CHECK", "false") == "true":
 | 
					 | 
				
			||||||
        from utils_check import runner as check_runner
 | 
					 | 
				
			||||||
        check_runner.verify_unchecked()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    else: # launch with full action
 | 
					    else: # launch with full action
 | 
				
			||||||
        slack_runner = slack_runner.BotRunner(coordinator.incoming_request)
 | 
					        slack_runner = slack_runner.BotRunner(coordinator.incoming_request)
 | 
				
			||||||
        kwargs = {
 | 
					        kwargs = {
 | 
				
			||||||
@@ -196,10 +209,10 @@ if __name__ == "__main__":
 | 
				
			|||||||
        try:
 | 
					        try:
 | 
				
			||||||
            coordinator.add_workers(**kwargs)
 | 
					            coordinator.add_workers(**kwargs)
 | 
				
			||||||
            coordinator.start()
 | 
					            coordinator.start()
 | 
				
			||||||
            slack_runner.start()
 | 
					            slack_runner.start() # last one to start, inside the main thread
 | 
				
			||||||
        except KeyboardInterrupt:
 | 
					        except KeyboardInterrupt:
 | 
				
			||||||
            logger.info("Keyboard interrupt. Stopping Slack and Coordinator")
 | 
					            logger.info("Keyboard interrupt. Stopping Slack and Coordinator")
 | 
				
			||||||
            slack_runner.stop()
 | 
					            slack_runner.stop()
 | 
				
			||||||
            print("BYE!")
 | 
					            logger.info("BYE!")
 | 
				
			||||||
            # coordinator was set as a daemon thread, so it will be stopped automatically
 | 
					            # coordinator was set as a daemon thread, so it will be stopped automatically
 | 
				
			||||||
            sys.exit(0)
 | 
					            sys.exit(0)
 | 
				
			||||||
@@ -23,7 +23,7 @@ u_options = {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bot_client = WebClient(
 | 
					bot_client = WebClient(
 | 
				
			||||||
    token = configuration.parsed["SLACK"]["auth_token"]
 | 
					    token = configuration.main_config["SLACK"]["auth_token"]
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -70,7 +70,7 @@ def send_reaction_to_slack_thread(article, reaction):
 | 
				
			|||||||
        else:
 | 
					        else:
 | 
				
			||||||
            ts = m.slack_ts
 | 
					            ts = m.slack_ts
 | 
				
			||||||
            bot_client.reactions_add(
 | 
					            bot_client.reactions_add(
 | 
				
			||||||
                channel=configuration.parsed["SLACK"]["archive_id"],
 | 
					                channel=configuration.main_config["SLACK"]["archive_id"],
 | 
				
			||||||
                name=reaction,
 | 
					                name=reaction,
 | 
				
			||||||
                timestamp=ts
 | 
					                timestamp=ts
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
@@ -7,7 +7,7 @@ import logging
 | 
				
			|||||||
import configuration
 | 
					import configuration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
logger = logging.getLogger(__name__)
 | 
					logger = logging.getLogger(__name__)
 | 
				
			||||||
config = configuration.parsed["MAIL"]
 | 
					config = configuration.main_config["MAIL"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def send(article_model):
 | 
					def send(article_model):
 | 
				
			||||||
    mail = MIMEMultipart()
 | 
					    mail = MIMEMultipart()
 | 
				
			||||||
							
								
								
									
										238
									
								
								news_fetch/utils_slack/runner.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										238
									
								
								news_fetch/utils_slack/runner.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,238 @@
 | 
				
			|||||||
 | 
					from slack_bolt import App
 | 
				
			||||||
 | 
					from slack_bolt.adapter.socket_mode import SocketModeHandler
 | 
				
			||||||
 | 
					from slack_sdk.errors import SlackApiError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import logging
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import configuration
 | 
				
			||||||
 | 
					config = configuration.main_config["SLACK"]
 | 
				
			||||||
 | 
					models = configuration.models
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class MessageIsUnwanted(Exception):
 | 
				
			||||||
 | 
					    # This exception is triggered when the message is either threaded (reply to another message) or weird (like an edit, a deletion, etc)
 | 
				
			||||||
 | 
					    pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Message:
 | 
				
			||||||
 | 
					    ts = str
 | 
				
			||||||
 | 
					    user_id = str
 | 
				
			||||||
 | 
					    text = str
 | 
				
			||||||
 | 
					    logger = logging.getLogger(__name__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, message_dict):
 | 
				
			||||||
 | 
					        if message_dict.get("subtype", "not bad") == "message_changed":
 | 
				
			||||||
 | 
					            raise MessageIsUnwanted()
 | 
				
			||||||
 | 
					        if message_dict["type"] == "message":
 | 
				
			||||||
 | 
					            if "thread_ts" in message_dict and (message_dict["thread_ts"] != message_dict["ts"]): # meaning it's a reply to another message
 | 
				
			||||||
 | 
					                raise MessageIsUnwanted()
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            self.user_id = message_dict.get("user", "BAD USER")
 | 
				
			||||||
 | 
					            # self.channel_id = config["archive_id"] # by construction, other messages are not intercepted
 | 
				
			||||||
 | 
					            self.ts = message_dict["ts"]
 | 
				
			||||||
 | 
					            self.text = message_dict["text"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            self.logger.warning(f"What should I do of {message_dict}")
 | 
				
			||||||
 | 
					            raise MessageIsUnwanted()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __str__(self) -> str:
 | 
				
			||||||
 | 
					        return f"MSG [{self.text}]"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def urls(self):
 | 
				
			||||||
 | 
					        pattern = r"<(.*?)>"
 | 
				
			||||||
 | 
					        matches = re.findall(pattern, self.text)
 | 
				
			||||||
 | 
					        matches = [m for m in matches if "." in m] # must contain a tld, right?
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        new_matches = []
 | 
				
			||||||
 | 
					        for m in matches:
 | 
				
			||||||
 | 
					            # further complication: slack automatically abreviates urls in the format: 
 | 
				
			||||||
 | 
					            # <url|link preview>. Lucky for us, "|" is a character derecommended in urls, meaning we can "safely" split for it and retain the first half
 | 
				
			||||||
 | 
					            if "|" in m:
 | 
				
			||||||
 | 
					                keep = m.split("|")[0]
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                keep = m
 | 
				
			||||||
 | 
					            new_matches.append(keep)
 | 
				
			||||||
 | 
					        return new_matches
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def is_by_human(self):
 | 
				
			||||||
 | 
					        return self.user.user_id != config["bot_id"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def has_single_url(self):
 | 
				
			||||||
 | 
					        return len(self.urls) == 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class BotApp(App):
 | 
				
			||||||
 | 
					    logger = logging.getLogger(__name__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, callback, *args, **kwargs):
 | 
				
			||||||
 | 
					        super().__init__(*args, **kwargs)
 | 
				
			||||||
 | 
					        self.callback = callback
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def pre_start(self):
 | 
				
			||||||
 | 
					        missed_messages = self.fetch_missed_channel_messages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        [self.handle_incoming_message(m) for m in missed_messages]
 | 
				
			||||||
 | 
					        self.startup_status()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def say_substitute(self, *args, **kwargs):
 | 
				
			||||||
 | 
					        self.client.chat_postMessage(
 | 
				
			||||||
 | 
					            channel=config["archive_id"],
 | 
				
			||||||
 | 
					            text=" - ".join(args),
 | 
				
			||||||
 | 
					            **kwargs
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def fetch_missed_channel_messages(self):
 | 
				
			||||||
 | 
					        # latest processed message_ts is:
 | 
				
			||||||
 | 
					        presaved = models.ArticleDownload.select().order_by(models.ArticleDownload.slack_ts.desc()).get_or_none()
 | 
				
			||||||
 | 
					        if presaved is None:
 | 
				
			||||||
 | 
					            last_ts = 0
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            last_ts = presaved.slack_ts_full
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        result = self.client.conversations_history(
 | 
				
			||||||
 | 
					            channel=config["archive_id"],
 | 
				
			||||||
 | 
					            oldest=last_ts
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        new_messages = result.get("messages", [])
 | 
				
			||||||
 | 
					        # # filter the last one, it is a duplicate! (only if the db is not empty!)
 | 
				
			||||||
 | 
					        # if last_ts != 0 and len(new_messages) != 0:
 | 
				
			||||||
 | 
					        #     new_messages.pop(-1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return_messages = [Message(m) for m in new_messages]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        refetch = result.get("has_more", False)
 | 
				
			||||||
 | 
					        while refetch: # we have not actually fetched them all
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                result = self.client.conversations_history(
 | 
				
			||||||
 | 
					                    channel = config["archive_id"],
 | 
				
			||||||
 | 
					                    cursor = result["response_metadata"]["next_cursor"],
 | 
				
			||||||
 | 
					                    oldest = last_ts
 | 
				
			||||||
 | 
					                ) # fetches 100 messages, older than the [-1](=oldest) element of new_fetches
 | 
				
			||||||
 | 
					                refetch = result.get("has_more", False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                new_messages = result.get("messages", [])
 | 
				
			||||||
 | 
					                for m in new_messages:
 | 
				
			||||||
 | 
					                    return_messages.append(Message(m))
 | 
				
			||||||
 | 
					            except SlackApiError: # Most likely a rate-limit
 | 
				
			||||||
 | 
					                self.logger.error("Error while fetching channel messages. (likely rate limit) Retrying in {} seconds...".format(config["api_wait_time"]))
 | 
				
			||||||
 | 
					                time.sleep(config["api_wait_time"])
 | 
				
			||||||
 | 
					                refetch = True
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        self.logger.info(f"Fetched {len(return_messages)} new channel messages.")
 | 
				
			||||||
 | 
					        return return_messages
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def handle_incoming_message(self, message, say=None):
 | 
				
			||||||
 | 
					        """Reacts to all messages inside channel archiving. This either gets called when catching up on missed messages (by pre_start()) or by the SocketModeHandler in 'live' mode"""
 | 
				
			||||||
 | 
					        if isinstance(message, dict): 
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                message = Message(message)
 | 
				
			||||||
 | 
					            except MessageIsUnwanted:
 | 
				
			||||||
 | 
					                return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        self.logger.info(f"Handling message {message} ({len(message.urls)} urls)")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        if len(message.urls) > 1:
 | 
				
			||||||
 | 
					            self.say_substitute("Only the first url is being handled. Please send any subsequent url as a separate message", thread_ts=message.thread.slack_ts)
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        self.callback(message = message)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def respond_channel_message(self, article, say=None):
 | 
				
			||||||
 | 
					        if say is None:
 | 
				
			||||||
 | 
					            say = self.say_substitute
 | 
				
			||||||
 | 
					        answers = article.slack_info
 | 
				
			||||||
 | 
					        for a in answers:
 | 
				
			||||||
 | 
					            if a["file_path"]:
 | 
				
			||||||
 | 
					                try:
 | 
				
			||||||
 | 
					                    self.client.files_upload(
 | 
				
			||||||
 | 
					                        channels = config["archive_id"],
 | 
				
			||||||
 | 
					                        initial_comment = f"{a['reply_text']}",
 | 
				
			||||||
 | 
					                        file = a["file_path"],
 | 
				
			||||||
 | 
					                        thread_ts = article.slack_ts_full
 | 
				
			||||||
 | 
					                    )
 | 
				
			||||||
 | 
					                    status = True
 | 
				
			||||||
 | 
					                except SlackApiError as e: # upload resulted in an error
 | 
				
			||||||
 | 
					                    say(
 | 
				
			||||||
 | 
					                        "File {} could not be uploaded.".format(a),
 | 
				
			||||||
 | 
					                        thread_ts = article.slack_ts_full
 | 
				
			||||||
 | 
					                    )
 | 
				
			||||||
 | 
					                    status = False
 | 
				
			||||||
 | 
					                    self.logger.error(f"File upload failed: {e}")
 | 
				
			||||||
 | 
					            else: # anticipated that there is no file!
 | 
				
			||||||
 | 
					                say(
 | 
				
			||||||
 | 
					                    f"{a['reply_text']}",
 | 
				
			||||||
 | 
					                    thread_ts = article.slack_ts_full
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					                status = True
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def startup_status(self):
 | 
				
			||||||
 | 
					        """Prints an overview of the articles. This needs to be called here because it should run after having fetched the newly sent messages"""
 | 
				
			||||||
 | 
					        total = models.ArticleDownload.select().count()
 | 
				
			||||||
 | 
					        to_be_processed = models.ArticleDownload.select().where(models.ArticleDownload.title == "").count()
 | 
				
			||||||
 | 
					        unchecked = models.ArticleDownload.select().where(models.ArticleDownload.verified == 0).count()
 | 
				
			||||||
 | 
					        bad = models.ArticleDownload.select().where(models.ArticleDownload.verified == -1).count()
 | 
				
			||||||
 | 
					        not_uploaded = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "").count()
 | 
				
			||||||
 | 
					        self.logger.info(
 | 
				
			||||||
 | 
					            f"[bold]NEWS-FETCH DATABASE STATUS[/bold]: Total entries: {total}; Not yet downloaded: {to_be_processed}; Not yet checked: {unchecked}; Not yet uploaded to archive: {not_uploaded}; Marked as bad: {bad}",
 | 
				
			||||||
 | 
					            extra={"markup": True}
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class BotRunner():
 | 
				
			||||||
 | 
					    logger = logging.getLogger(__name__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    """Stupid encapsulation so that we can apply the slack decorators to the BotApp"""
 | 
				
			||||||
 | 
					    def __init__(self, callback, *args, **kwargs) -> None:
 | 
				
			||||||
 | 
					        self.bot_worker = BotApp(callback, token=config["auth_token"])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        @self.bot_worker.event(event="message", matchers=[is_message_in_archiving])
 | 
				
			||||||
 | 
					        def handle_incoming_message(message, say):
 | 
				
			||||||
 | 
					            return self.bot_worker.handle_incoming_message(message, say)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # @self.bot_worker.event(event="reaction_added", matchers=[is_reaction_in_archiving])
 | 
				
			||||||
 | 
					        # def handle_incoming_reaction(event, say):
 | 
				
			||||||
 | 
					        #     return self.bot_worker.handle_incoming_reaction(event)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        @self.bot_worker.event(event="event")
 | 
				
			||||||
 | 
					        def handle_all_other_reactions(event, say):
 | 
				
			||||||
 | 
					            self.logger.log("Ignoring slack event that isn't a message")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.handler = SocketModeHandler(self.bot_worker, config["app_token"])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def start(self):
 | 
				
			||||||
 | 
					        self.bot_worker.pre_start()
 | 
				
			||||||
 | 
					        self.handler.start()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def stop(self):
 | 
				
			||||||
 | 
					        self.handler.close()
 | 
				
			||||||
 | 
					        self.logger.info("Closed Slack-Socketmodehandler")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def is_message_in_archiving(message) -> bool:
 | 
				
			||||||
 | 
					    return message["channel"] == config["archive_id"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										10
									
								
								news_fetch/utils_storage/helpers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								news_fetch/utils_storage/helpers.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,10 @@
 | 
				
			|||||||
 | 
					def clear_path_name(path):
 | 
				
			||||||
 | 
					    keepcharacters = (' ','.','_', '-')
 | 
				
			||||||
 | 
					    converted = "".join([c if (c.isalnum() or c in keepcharacters) else "_" for c in path]).rstrip()
 | 
				
			||||||
 | 
					    return converted
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def shorten_name(name, offset = 50):
 | 
				
			||||||
 | 
					    if len(name) > offset:
 | 
				
			||||||
 | 
					        return name[:offset] + "..."
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        return name
 | 
				
			||||||
							
								
								
									
										297
									
								
								news_fetch/utils_storage/models.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										297
									
								
								news_fetch/utils_storage/models.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,297 @@
 | 
				
			|||||||
 | 
					import logging
 | 
				
			||||||
 | 
					logger = logging.getLogger(__name__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from peewee import *
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import markdown
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					import configuration
 | 
				
			||||||
 | 
					import datetime
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from . import helpers
 | 
				
			||||||
 | 
					config = configuration.main_config["DOWNLOADS"]
 | 
				
			||||||
 | 
					slack_config = configuration.main_config["SLACK"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# set the nature of the db at runtime
 | 
				
			||||||
 | 
					download_db = DatabaseProxy()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class DownloadBaseModel(Model):
 | 
				
			||||||
 | 
					    class Meta:
 | 
				
			||||||
 | 
					        database = download_db
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## == Article related models == ##
 | 
				
			||||||
 | 
					class ArticleDownload(DownloadBaseModel):
 | 
				
			||||||
 | 
					    # in the beginning this is all we have
 | 
				
			||||||
 | 
					    article_url = TextField(default = '', unique=True)
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    # fetch then fills in the metadata
 | 
				
			||||||
 | 
					    title = CharField(default='')
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def is_title_bad(self):  # add incrementally
 | 
				
			||||||
 | 
					        return "PUR-Abo" in self.title \
 | 
				
			||||||
 | 
					            or "Redirecting" in self.title \
 | 
				
			||||||
 | 
					            or "Error while running fetch" in self.title
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    summary = TextField(default = '')
 | 
				
			||||||
 | 
					    source_name = CharField(default = '')
 | 
				
			||||||
 | 
					    language = CharField(default = '')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    file_name = TextField(default = '')
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def save_path(self):
 | 
				
			||||||
 | 
					        return f"{config['local_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/"
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def fname_nas(self, file_name=""):
 | 
				
			||||||
 | 
					        if self.download_date:
 | 
				
			||||||
 | 
					            if file_name:
 | 
				
			||||||
 | 
					                return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{file_name}"
 | 
				
			||||||
 | 
					            else: # return the self. name
 | 
				
			||||||
 | 
					                return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{self.file_name}"
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            return None
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def fname_template(self):
 | 
				
			||||||
 | 
					        if "youtube.com" in self.source_name or "youtu.be" in self.source_name:
 | 
				
			||||||
 | 
					            fname = f"{self.source_name} -- {self.title}"
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            fname = f"{self.source_name} -- {self.title}.pdf"
 | 
				
			||||||
 | 
					        return helpers.clear_path_name(fname)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    archive_url = TextField(default = '')
 | 
				
			||||||
 | 
					    pub_date = DateField(default = '')
 | 
				
			||||||
 | 
					    download_date = DateField(default = datetime.date.today)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def slack_ts_full(self):
 | 
				
			||||||
 | 
					        str_ts = str(self.slack_ts)
 | 
				
			||||||
 | 
					        cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals
 | 
				
			||||||
 | 
					        return f"{str_ts}{cut_zeros * '0'}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    sent = BooleanField(default = False)
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    archived_by = CharField(default = os.getenv("UNAME"))
 | 
				
			||||||
 | 
					    # need to know who saved the message because the file needs to be on their computer in order to get verified
 | 
				
			||||||
 | 
					    # verification happens in a different app, but the model has the fields here as well
 | 
				
			||||||
 | 
					    comment = TextField(default = '')
 | 
				
			||||||
 | 
					    verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # authors
 | 
				
			||||||
 | 
					    # keywords
 | 
				
			||||||
 | 
					    # ... are added through foreignkeys
 | 
				
			||||||
 | 
					    # we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ## Helpers specific to a single article
 | 
				
			||||||
 | 
					    def __str__(self) -> str:
 | 
				
			||||||
 | 
					        if self.title != '' and self.source_name != '':
 | 
				
			||||||
 | 
					            desc = f"{helpers.shorten_name(self.title)} -- {self.source_name}"
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            desc = f"{self.article_url}"
 | 
				
			||||||
 | 
					        return f"ART [{desc}]"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def slack_info(self):
 | 
				
			||||||
 | 
					        status = [":x: No better version available", ":gear: Verification pending", ":white_check_mark: Verified by human"][self.verified + 1]
 | 
				
			||||||
 | 
					        content = "\n>" + "\n>".join(self.summary.split("\n"))
 | 
				
			||||||
 | 
					        file_status, msg = self.file_status()
 | 
				
			||||||
 | 
					        if not file_status:
 | 
				
			||||||
 | 
					            return [msg]
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        # everything alright: generate real content
 | 
				
			||||||
 | 
					        # first the base file
 | 
				
			||||||
 | 
					        if self.file_name[-4:] == ".pdf":
 | 
				
			||||||
 | 
					            answer = [{ # main reply with the base pdf
 | 
				
			||||||
 | 
					                "reply_text" : f"*{self.title}*\n{status}\n{content}",
 | 
				
			||||||
 | 
					                "file_path" : self.save_path + self.file_name 
 | 
				
			||||||
 | 
					            }]
 | 
				
			||||||
 | 
					        else: # don't upload if the file is too big!
 | 
				
			||||||
 | 
					            location = f"Not uploaded to slack, but the file will be on the NAS:\n`{self.fname_nas}`"
 | 
				
			||||||
 | 
					            answer = [{ # main reply with the base pdf
 | 
				
			||||||
 | 
					                "reply_text" : f"*{self.title}*\n{status}\n{content}\n{location}",
 | 
				
			||||||
 | 
					                "file_path" : None 
 | 
				
			||||||
 | 
					            }]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # then the related files
 | 
				
			||||||
 | 
					        rel_text = ""
 | 
				
			||||||
 | 
					        for r in self.related:
 | 
				
			||||||
 | 
					            fname = r.related_file_name
 | 
				
			||||||
 | 
					            lentry = "\n• `{}` ".format(self.fname_nas(fname))
 | 
				
			||||||
 | 
					            if fname[-4:] == ".pdf": # this is a manageable file, directly upload
 | 
				
			||||||
 | 
					                f_ret = self.save_path + fname
 | 
				
			||||||
 | 
					                answer.append({"reply_text":"", "file_path" : f_ret})
 | 
				
			||||||
 | 
					            else: # not pdf <=> too large. Don't upload but mention its existence
 | 
				
			||||||
 | 
					                lentry += "(not uploaded to slack, but the file will be on the NAS)"
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					            rel_text += lentry
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if rel_text:
 | 
				
			||||||
 | 
					            rel_text = answer[0]["reply_text"] = answer[0]["reply_text"] + "\nRelated files:\n" + rel_text
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        return answer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def mail_info(self):
 | 
				
			||||||
 | 
					        base = [{"reply_text": f"[{self.article_url}]({self.article_url})\n", "file_path":None}] + self.slack_info
 | 
				
			||||||
 | 
					        return [{"reply_text": markdown.markdown(m["reply_text"]), "file_path": m["file_path"]} for m in base]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def set_authors(self, authors):
 | 
				
			||||||
 | 
					        for a in authors:
 | 
				
			||||||
 | 
					            ArticleAuthor.create(
 | 
				
			||||||
 | 
					                article = self,
 | 
				
			||||||
 | 
					                author = a
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def set_related(self, related):
 | 
				
			||||||
 | 
					        for r in related:
 | 
				
			||||||
 | 
					            ArticleRelated.create(
 | 
				
			||||||
 | 
					                article = self,
 | 
				
			||||||
 | 
					                related_file_name = r
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def file_status(self):
 | 
				
			||||||
 | 
					        if not self.file_name:
 | 
				
			||||||
 | 
					            logger.error(f"Article {self} has no filename!")
 | 
				
			||||||
 | 
					            return False, {"reply_text": "Download failed, no file was saved.", "file_path": None}
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        file_path_abs = self.save_path + self.file_name
 | 
				
			||||||
 | 
					        if not os.path.exists(file_path_abs):
 | 
				
			||||||
 | 
					            logger.error(f"Article {self} has a filename, but the file does not exist at that location!")
 | 
				
			||||||
 | 
					            return False, {"reply_text": "Can't find file. Either the download failed or the file was moved.", "file_path": None}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return True, {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ArticleAuthor(DownloadBaseModel):
 | 
				
			||||||
 | 
					    article = ForeignKeyField(ArticleDownload, backref='authors')
 | 
				
			||||||
 | 
					    author = CharField()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ArticleRelated(DownloadBaseModel):
 | 
				
			||||||
 | 
					    # Related files, such as the full text of a paper, audio files, etc.
 | 
				
			||||||
 | 
					    article = ForeignKeyField(ArticleDownload, backref='related')
 | 
				
			||||||
 | 
					    related_file_name = TextField(default = '')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# class Thread(ChatBaseModel):
 | 
				
			||||||
 | 
					#     """The threads that concern us are only created if the base massage contains a url"""
 | 
				
			||||||
 | 
					#     thread_ts = FloatField(default = 0)
 | 
				
			||||||
 | 
					#     article = ForeignKeyField(ArticleDownload, backref="slack_thread", null=True, default=None)
 | 
				
			||||||
 | 
					#     # provides, ts, user, models
 | 
				
			||||||
 | 
					#     # messages
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#     @property
 | 
				
			||||||
 | 
					#     def slack_ts(self):
 | 
				
			||||||
 | 
					#         str_ts = str(self.thread_ts)
 | 
				
			||||||
 | 
					#         cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem!
 | 
				
			||||||
 | 
					#         return "{}{}".format(str_ts, cut_zeros*"0")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#     @property
 | 
				
			||||||
 | 
					#     def initiator_message(self):
 | 
				
			||||||
 | 
					#         try:
 | 
				
			||||||
 | 
					#             return self.messages[0] # TODO check if this needs sorting
 | 
				
			||||||
 | 
					#         except IndexError:
 | 
				
			||||||
 | 
					#             logger.warning(f"Thread {self} is empty. How can that be?")
 | 
				
			||||||
 | 
					#             return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#     @property
 | 
				
			||||||
 | 
					#     def message_count(self):
 | 
				
			||||||
 | 
					#         # logger.warning("message_count was called")
 | 
				
			||||||
 | 
					#         return self.messages.count()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#     @property
 | 
				
			||||||
 | 
					#     def last_message(self):
 | 
				
			||||||
 | 
					#         messages = Message.select().where(Message.thread == self).order_by(Message.ts) # can't be empty by definition/creation
 | 
				
			||||||
 | 
					#         return messages[-1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#     @property
 | 
				
			||||||
 | 
					#     def is_fully_processed(self) -> bool:
 | 
				
			||||||
 | 
					#         init_message = self.initiator_message
 | 
				
			||||||
 | 
					#         if init_message is None:
 | 
				
			||||||
 | 
					#             return False
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					#         if init_message.is_processed_override:
 | 
				
			||||||
 | 
					#             return True
 | 
				
			||||||
 | 
					#         # this override is set for instance, when no url was sent at all. Then set this thread to be ignored
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					#         reactions = init_message.reaction
 | 
				
			||||||
 | 
					#         if not reactions:
 | 
				
			||||||
 | 
					#             return False
 | 
				
			||||||
 | 
					#         else:
 | 
				
			||||||
 | 
					#             r = reactions[0].type # can and should only have one reaction
 | 
				
			||||||
 | 
					#             return r == "white_check_mark" \
 | 
				
			||||||
 | 
					#                 or r == "x"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					# class Message(ChatBaseModel):
 | 
				
			||||||
 | 
					#     ts = FloatField(unique=True) #for sorting
 | 
				
			||||||
 | 
					#     channel_id = CharField(default='')
 | 
				
			||||||
 | 
					#     user = ForeignKeyField(User, backref="messages")
 | 
				
			||||||
 | 
					#     text = TextField(default='')
 | 
				
			||||||
 | 
					#     thread = ForeignKeyField(Thread, backref="messages", default=None)
 | 
				
			||||||
 | 
					#     file_type = CharField(default='')
 | 
				
			||||||
 | 
					#     perma_link = CharField(default='')
 | 
				
			||||||
 | 
					#     is_processed_override = BooleanField(default=False)
 | 
				
			||||||
 | 
					#     # reaction
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#     def __str__(self) -> str:
 | 
				
			||||||
 | 
					#         return "MSG [{}]".format(shorten_name(self.text).replace('\n','/'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#     @property
 | 
				
			||||||
 | 
					#     def slack_ts(self):
 | 
				
			||||||
 | 
					#         str_ts = str(self.ts)
 | 
				
			||||||
 | 
					#         cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem!
 | 
				
			||||||
 | 
					#         return "{}{}".format(str_ts, cut_zeros * "0")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#     @property
 | 
				
			||||||
 | 
					#     def urls(self):
 | 
				
			||||||
 | 
					#         pattern = r"<(.*?)>"
 | 
				
			||||||
 | 
					#         matches = re.findall(pattern, self.text)
 | 
				
			||||||
 | 
					#         matches = [m for m in matches if "." in m]
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					#         new_matches = []
 | 
				
			||||||
 | 
					#         for m in matches:
 | 
				
			||||||
 | 
					#             if "." in m:  # must contain a tld, right?
 | 
				
			||||||
 | 
					#                 # further complication: slack automatically abreviates urls in the format: 
 | 
				
			||||||
 | 
					#                 # <url|link preview>. Lucky for us, "|" is a character derecommended in urls, meaning we can "safely" split for it and retain the first half
 | 
				
			||||||
 | 
					#                 if "|" in m:
 | 
				
			||||||
 | 
					#                     keep = m.split("|")[0]
 | 
				
			||||||
 | 
					#                 else:
 | 
				
			||||||
 | 
					#                     keep = m
 | 
				
			||||||
 | 
					#                 new_matches.append(keep)
 | 
				
			||||||
 | 
					#         return new_matches
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					#     @property
 | 
				
			||||||
 | 
					#     def is_by_human(self):
 | 
				
			||||||
 | 
					#         return self.user.user_id != slack_config["bot_id"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					#     @property
 | 
				
			||||||
 | 
					#     def has_single_url(self):
 | 
				
			||||||
 | 
					#         return len(self.urls) == 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def set_db(download_db_object):
 | 
				
			||||||
 | 
					    download_db.initialize(download_db_object)
 | 
				
			||||||
 | 
					    with download_db: # create tables (does nothing if they exist already)
 | 
				
			||||||
 | 
					        download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -5,7 +5,7 @@ from pathlib import Path
 | 
				
			|||||||
import logging
 | 
					import logging
 | 
				
			||||||
logger = logging.getLogger(__name__)
 | 
					logger = logging.getLogger(__name__)
 | 
				
			||||||
import configuration
 | 
					import configuration
 | 
				
			||||||
config = configuration.parsed["DOWNLOADS"]
 | 
					config = configuration.main_config["DOWNLOADS"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
shrink_sizes = []
 | 
					shrink_sizes = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -8,7 +8,7 @@ from selenium import webdriver
 | 
				
			|||||||
import configuration
 | 
					import configuration
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
config = configuration.parsed["DOWNLOADS"]
 | 
					config = configuration.main_config["DOWNLOADS"]
 | 
				
			||||||
blacklisted = json.loads(config["blacklisted_href_domains"])
 | 
					blacklisted = json.loads(config["blacklisted_href_domains"])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -25,10 +25,10 @@ class PDFDownloader:
 | 
				
			|||||||
        options.profile = config["browser_profile_path"]
 | 
					        options.profile = config["browser_profile_path"]
 | 
				
			||||||
        # should be options.set_preference("profile", config["browser_profile_path"]) as of selenium 4 but that doesn't work
 | 
					        # should be options.set_preference("profile", config["browser_profile_path"]) as of selenium 4 but that doesn't work
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if os.getenv("HEADLESS", "false") == "true":
 | 
					        if os.getenv("DEBUG", "false") == "true":
 | 
				
			||||||
            options.add_argument('--headless')
 | 
					            self.logger.warning("Opening browser GUI because of 'DEBUG=true'")
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self.logger.warning("Opening browser GUI because of 'HEADLESS=false'")
 | 
					            options.add_argument('--headless')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        options.set_preference('print.save_as_pdf.links.enabled', True)
 | 
					        options.set_preference('print.save_as_pdf.links.enabled', True)
 | 
				
			||||||
        # Just save if the filetype is pdf already
 | 
					        # Just save if the filetype is pdf already
 | 
				
			||||||
@@ -92,7 +92,7 @@ class PDFDownloader:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        # in the mean time, get a page title if required
 | 
					        # in the mean time, get a page title if required
 | 
				
			||||||
        if article_object.is_title_bad:
 | 
					        if article_object.is_title_bad:
 | 
				
			||||||
            article_object.title = self.driver.title.replace(".pdf", "")
 | 
					            article_object.title = self.driver.title.replace(".pdf", "") # some titles end with .pdf
 | 
				
			||||||
            # will be propagated to the saved file (dst) as well
 | 
					            # will be propagated to the saved file (dst) as well
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        fname = article_object.fname_template
 | 
					        fname = article_object.fname_template
 | 
				
			||||||
@@ -112,7 +112,6 @@ class PDFDownloader:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        if success:
 | 
					        if success:
 | 
				
			||||||
            article_object.file_name = fname
 | 
					            article_object.file_name = fname
 | 
				
			||||||
            article_object.set_references(self.get_references())
 | 
					 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            article_object.file_name = ""
 | 
					            article_object.file_name = ""
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
@@ -150,18 +149,6 @@ class PDFDownloader:
 | 
				
			|||||||
            return False
 | 
					            return False
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_references(self):
 | 
					 | 
				
			||||||
        try:
 | 
					 | 
				
			||||||
            hrefs = [e.get_attribute("href") for e in self.driver.find_elements_by_xpath("//a[@href]")]
 | 
					 | 
				
			||||||
        except:
 | 
					 | 
				
			||||||
            hrefs = []
 | 
					 | 
				
			||||||
        # len_old = len(hrefs)
 | 
					 | 
				
			||||||
        hrefs = [h for h in hrefs \
 | 
					 | 
				
			||||||
            if not sum([(domain in h) for domain in blacklisted]) # sum([True, False, False, False]) == 1 (esp. not 0)
 | 
					 | 
				
			||||||
            ] # filter a tiny bit at least
 | 
					 | 
				
			||||||
        # self.logger.info(f"Hrefs filtered (before: {len_old}, after: {len(hrefs)})")
 | 
					 | 
				
			||||||
        return hrefs
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -54,9 +54,4 @@ def get_description(article_object):
 | 
				
			|||||||
    except AttributeError:
 | 
					    except AttributeError:
 | 
				
			||||||
        pass # list would have been empty anyway
 | 
					        pass # list would have been empty anyway
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        article_object.set_keywords(news_article.keywords)
 | 
					 | 
				
			||||||
    except AttributeError:
 | 
					 | 
				
			||||||
        pass # list would have been empty anyway
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    return article_object
 | 
					    return article_object
 | 
				
			||||||
		Reference in New Issue
	
	Block a user