FS updates and corrections

This commit is contained in:
Remy Moll 2022-06-15 11:14:08 +02:00
parent 54760abee4
commit 87d65fc988
14 changed files with 91 additions and 56 deletions

4
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,4 @@
{
"python.linting.flake8Enabled": true,
"python.linting.enabled": false
}

View File

@ -2,7 +2,6 @@ FROM python:latest
ENV TZ Euopre/Zurich
RUN echo "deb http://deb.debian.org/debian/ unstable main contrib non-free" >> /etc/apt/sources.list
RUN apt-get update && apt-get install -y \
evince \

View File

@ -49,9 +49,15 @@ I also wrote a rudimentary docker compose file which makes running much more sim
All relevant passthroughs and mounts are specified through the env-file, for which I configured 4 versions: production, debug (development in general), upload and check. These files will have to be adapted to your individual setup but can be reused more easily.
> Note:
For the debug env-file, you will likely want interactivity, so you need to run:
`docker compose --env-file env/debug run auto_news`
<!-- > Note:
>
> The `debug` requires additional input. Once `docker compose up` is running, in a new session run `docker compose --env-file env/debug exec bash`. The live-mounted code is then under `/code`. Note that the `DEBUG=true` environment variable is still set. If you want to test things on production, run `export DEBUG=false`.
-->
## Building

View File

@ -8,7 +8,7 @@ from rich.logging import RichHandler
logging.basicConfig(
format='%(message)s',
level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S',
datefmt='%H:%M:%S', # add %Y-%m-%d if needed
handlers=[RichHandler()]
)
logger = logging.getLogger(__name__)

View File

@ -158,10 +158,11 @@ def verify_unchecked():
try:
# close any previously opened windows:
subprocess.call("killall evince")
subprocess.call(["kill", "`pgrep evince`"])
# then open a new one
subprocess.Popen(["evince", f"file://{os.path.join(article.save_path, article.file_name)}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# supress evince gtk warnings
print("done")
except Exception as e:
print(e)
continue

View File

@ -207,7 +207,11 @@ class Thread(ChatBaseModel):
@property
def initiator_message(self):
try:
return self.messages[0] # TODO check if this needs sorting
except IndexError:
logger.warning(f"Thread {self} is empty. How can that be?")
return None
@property
def message_count(self):
@ -222,6 +226,9 @@ class Thread(ChatBaseModel):
@property
def is_fully_processed(self) -> bool:
init_message = self.initiator_message
if init_message is None:
return False
if init_message.is_processed_override:
return True
# this override is set for instance, when no url was sent at all. Then set this thread to be ignored

View File

@ -5,13 +5,13 @@ import os
import base64
import requests
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import configuration
import json
config = configuration.parsed["DOWNLOADS"]
blacklisted = json.loads(config["blacklisted_href_domains"])
class PDFDownloader:
"""Saves a given url. Fills the object it got as a parameter"""
logger = logging.getLogger(__name__)
@ -19,10 +19,8 @@ class PDFDownloader:
running = False
def start(self):
try:
self.finish()
except:
self.logger.info("gecko driver not yet running")
self.finish() # clear up
options = webdriver.FirefoxOptions()
options.profile = config["browser_profile_path"]
# should be options.set_preference("profile", config["browser_profile_path"]) as of selenium 4 but that doesn't work
@ -59,10 +57,12 @@ class PDFDownloader:
self.start() # relaunch the dl util
def finish(self):
if self.running:
self.logger.info("Exiting gecko driver")
self.driver.quit()
self.running = False
else:
self.logger.info("Gecko driver not yet running")
def download(self, article_object):
sleep_time = 1

View File

@ -5,6 +5,7 @@ version: "3.9"
services:
auto_news:
build: .
image: auto_news:latest
volumes:
- ${CONTAINER_DATA}:/app/file_storage
- ${HOSTS_FILE}:/etc/hosts
@ -14,13 +15,17 @@ services:
network_mode: host
environment:
- DISPLAY=$DISPLAY
- TERM=xterm-256color # colored logs
- COLUMNS=160 # for wider logs
- DEBUG=${DEBUG}
- CHECK=${CHECK}
- UPLOAD=${UPLOAD}
- HEADLESS=${HEADLESS}
- REDUCEDFETCH=${REDUCEDFETCH}
entrypoint: ${ENTRYPOINT:-python3 runner.py} # by default launch workers as defined in the Dockerfile
stdin_open: ${INTERACTIVE:-false} # docker run -i
tty: ${INTERACTIVE:-false} # docker run -t
entrypoint: ${ENTRYPOINT:-"python3 runner.py"} # by default launch workers as defined in the Dockerfile
# geckodriver:
# image: selenium/standalone-firefox:100.0

4
env/check vendored
View File

@ -1,7 +1,7 @@
# Does not run any downloads but displays the previously downloaded but not yet checked files. Requires display-acces via xauth
CONTAINER_DATA=/mnt/Data/COSS/Downloads/auto_news.container
HOSTS_FILE=/mnt/Data/COSS/Downloads/auto_news.container/dependencies/hosts
CONTAINER_DATA=~/Bulk/COSS/Downloads/auto_news.container
HOSTS_FILE=~/Bulk/COSS/Downloads/auto_news.container/dependencies/hosts
XAUTHORTIY=$XAUTHORTIY

7
env/debug vendored
View File

@ -1,7 +1,7 @@
# Runs in a debugging mode, does not launch anything at all but starts a bash process
CONTAINER_DATA=/mnt/Data/COSS/Downloads/auto_news.container
HOSTS_FILE=/mnt/Data/COSS/Downloads/auto_news.container/dependencies/hosts
CONTAINER_DATA=~/Bulk/COSS/Downloads/auto_news.container
HOSTS_FILE=~/Bulk/COSS/Downloads/auto_news.container/dependencies/hosts
CODE=./
XAUTHORTIY=$XAUTHORTIY
@ -12,4 +12,5 @@ UPLOAD=false
HEADLESS=false
REDUCEDFETCH=false
ENTRYPOINT="sleep infinity"
ENTRYPOINT="/bin/bash"
INTERACTIVE=true

4
env/production vendored
View File

@ -1,7 +1,7 @@
# Runs on the main slack channel with the full worker setup. If nothing funky has occured, reducedfetch is a speedup
CONTAINER_DATA=/mnt/Data/Downloads/auto_news.container
HOSTS_FILE=/mnt/Data/COSS/Downloads/auto_news.container/dependencies/hosts
CONTAINER_DATA=~/Bulk/COSS/Downloads/auto_news.container
HOSTS_FILE=~/Bulk/COSS/Downloads/auto_news.container/dependencies/hosts
DEBUG=false
CHECK=false

4
env/upload vendored
View File

@ -1,7 +1,7 @@
# Does not run any other workers and only upploads to archive the urls that weren't previously uploaded
CONTAINER_DATA=/mnt/Data/COSS/Downloads/auto_news.container
HOSTS_FILE=/mnt/Data/COSS/Downloads/auto_news.container/dependencies/hosts
CONTAINER_DATA=~/Bulk/COSS/Downloads/auto_news.container
HOSTS_FILE=~/Bulk/COSS/Downloads/auto_news.container/dependencies/hosts
DEBUG=false

View File

@ -1,5 +1,3 @@
from cmath import log
from concurrent.futures import thread
import sys
sys.path.append("../app")
import runner
@ -8,14 +6,15 @@ logger = logging.getLogger()
import json
logger.info("Overwriting production values for single use media-fetch")
logger.info("Overwriting production values for single time media-fetch")
runner.configuration.models.set_db(
runner.configuration.SqliteDatabase("media_message_dummy.db"), # chat_db (not needed here)
runner.configuration.SqliteDatabase("media_downloads.db")
runner.configuration.SqliteDatabase("../.dev/media_message_dummy.db"), # chat_db (not needed here)
runner.configuration.SqliteDatabase("../.dev/media_downloads.db")
)
runner.configuration.parsed["DOWNLOADS"]["local_storage_path"] = "."
runner.configuration.parsed["DOWNLOADS"]["local_storage_path"] = "../.dev/"
def fetch():
coordinator = runner.Coordinator()
@ -38,3 +37,16 @@ for u in url_list:
dummy_thread = runner.models.Thread()
msg = runner.models.Message(text= msg_text, thread=dummy_thread)
coordinator.incoming_request(msg)
def show():
sel = runner.models.ArticleDownload.select()
entries = ["title"] #, "article_url", "archive_url"]
for e in entries:
r = [t.title for t in sel]
print(r)
# print([t for t in r])
show()