FS updates and corrections
This commit is contained in:
parent
54760abee4
commit
87d65fc988
4
.vscode/settings.json
vendored
Normal file
4
.vscode/settings.json
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
{
|
||||
"python.linting.flake8Enabled": true,
|
||||
"python.linting.enabled": false
|
||||
}
|
@ -2,7 +2,6 @@ FROM python:latest
|
||||
|
||||
ENV TZ Euopre/Zurich
|
||||
|
||||
|
||||
RUN echo "deb http://deb.debian.org/debian/ unstable main contrib non-free" >> /etc/apt/sources.list
|
||||
RUN apt-get update && apt-get install -y \
|
||||
evince \
|
||||
|
@ -49,9 +49,15 @@ I also wrote a rudimentary docker compose file which makes running much more sim
|
||||
|
||||
All relevant passthroughs and mounts are specified through the env-file, for which I configured 4 versions: production, debug (development in general), upload and check. These files will have to be adapted to your individual setup but can be reused more easily.
|
||||
|
||||
> Note:
|
||||
For the debug env-file, you will likely want interactivity, so you need to run:
|
||||
|
||||
`docker compose --env-file env/debug run auto_news`
|
||||
|
||||
<!-- > Note:
|
||||
>
|
||||
> The `debug` requires additional input. Once `docker compose up` is running, in a new session run `docker compose --env-file env/debug exec bash`. The live-mounted code is then under `/code`. Note that the `DEBUG=true` environment variable is still set. If you want to test things on production, run `export DEBUG=false`.
|
||||
-->
|
||||
|
||||
|
||||
|
||||
## Building
|
||||
|
@ -8,7 +8,7 @@ from rich.logging import RichHandler
|
||||
logging.basicConfig(
|
||||
format='%(message)s',
|
||||
level=logging.INFO,
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
datefmt='%H:%M:%S', # add %Y-%m-%d if needed
|
||||
handlers=[RichHandler()]
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -158,10 +158,11 @@ def verify_unchecked():
|
||||
|
||||
try:
|
||||
# close any previously opened windows:
|
||||
subprocess.call("killall evince")
|
||||
subprocess.call(["kill", "`pgrep evince`"])
|
||||
# then open a new one
|
||||
subprocess.Popen(["evince", f"file://{os.path.join(article.save_path, article.file_name)}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
# supress evince gtk warnings
|
||||
print("done")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
continue
|
||||
|
@ -207,7 +207,11 @@ class Thread(ChatBaseModel):
|
||||
|
||||
@property
|
||||
def initiator_message(self):
|
||||
try:
|
||||
return self.messages[0] # TODO check if this needs sorting
|
||||
except IndexError:
|
||||
logger.warning(f"Thread {self} is empty. How can that be?")
|
||||
return None
|
||||
|
||||
@property
|
||||
def message_count(self):
|
||||
@ -222,6 +226,9 @@ class Thread(ChatBaseModel):
|
||||
@property
|
||||
def is_fully_processed(self) -> bool:
|
||||
init_message = self.initiator_message
|
||||
if init_message is None:
|
||||
return False
|
||||
|
||||
if init_message.is_processed_override:
|
||||
return True
|
||||
# this override is set for instance, when no url was sent at all. Then set this thread to be ignored
|
||||
|
@ -5,13 +5,13 @@ import os
|
||||
import base64
|
||||
import requests
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.firefox.options import Options
|
||||
import configuration
|
||||
import json
|
||||
|
||||
config = configuration.parsed["DOWNLOADS"]
|
||||
blacklisted = json.loads(config["blacklisted_href_domains"])
|
||||
|
||||
|
||||
class PDFDownloader:
|
||||
"""Saves a given url. Fills the object it got as a parameter"""
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -19,10 +19,8 @@ class PDFDownloader:
|
||||
running = False
|
||||
|
||||
def start(self):
|
||||
try:
|
||||
self.finish()
|
||||
except:
|
||||
self.logger.info("gecko driver not yet running")
|
||||
self.finish() # clear up
|
||||
|
||||
options = webdriver.FirefoxOptions()
|
||||
options.profile = config["browser_profile_path"]
|
||||
# should be options.set_preference("profile", config["browser_profile_path"]) as of selenium 4 but that doesn't work
|
||||
@ -59,10 +57,12 @@ class PDFDownloader:
|
||||
self.start() # relaunch the dl util
|
||||
|
||||
def finish(self):
|
||||
if self.running:
|
||||
self.logger.info("Exiting gecko driver")
|
||||
self.driver.quit()
|
||||
self.running = False
|
||||
|
||||
else:
|
||||
self.logger.info("Gecko driver not yet running")
|
||||
|
||||
def download(self, article_object):
|
||||
sleep_time = 1
|
||||
|
@ -5,6 +5,7 @@ version: "3.9"
|
||||
services:
|
||||
auto_news:
|
||||
build: .
|
||||
image: auto_news:latest
|
||||
volumes:
|
||||
- ${CONTAINER_DATA}:/app/file_storage
|
||||
- ${HOSTS_FILE}:/etc/hosts
|
||||
@ -14,13 +15,17 @@ services:
|
||||
network_mode: host
|
||||
environment:
|
||||
- DISPLAY=$DISPLAY
|
||||
- TERM=xterm-256color # colored logs
|
||||
- COLUMNS=160 # for wider logs
|
||||
- DEBUG=${DEBUG}
|
||||
- CHECK=${CHECK}
|
||||
- UPLOAD=${UPLOAD}
|
||||
- HEADLESS=${HEADLESS}
|
||||
- REDUCEDFETCH=${REDUCEDFETCH}
|
||||
entrypoint: ${ENTRYPOINT:-python3 runner.py} # by default launch workers as defined in the Dockerfile
|
||||
stdin_open: ${INTERACTIVE:-false} # docker run -i
|
||||
tty: ${INTERACTIVE:-false} # docker run -t
|
||||
|
||||
entrypoint: ${ENTRYPOINT:-"python3 runner.py"} # by default launch workers as defined in the Dockerfile
|
||||
|
||||
# geckodriver:
|
||||
# image: selenium/standalone-firefox:100.0
|
||||
|
4
env/check
vendored
4
env/check
vendored
@ -1,7 +1,7 @@
|
||||
# Does not run any downloads but displays the previously downloaded but not yet checked files. Requires display-acces via xauth
|
||||
|
||||
CONTAINER_DATA=/mnt/Data/COSS/Downloads/auto_news.container
|
||||
HOSTS_FILE=/mnt/Data/COSS/Downloads/auto_news.container/dependencies/hosts
|
||||
CONTAINER_DATA=~/Bulk/COSS/Downloads/auto_news.container
|
||||
HOSTS_FILE=~/Bulk/COSS/Downloads/auto_news.container/dependencies/hosts
|
||||
|
||||
XAUTHORTIY=$XAUTHORTIY
|
||||
|
||||
|
7
env/debug
vendored
7
env/debug
vendored
@ -1,7 +1,7 @@
|
||||
# Runs in a debugging mode, does not launch anything at all but starts a bash process
|
||||
|
||||
CONTAINER_DATA=/mnt/Data/COSS/Downloads/auto_news.container
|
||||
HOSTS_FILE=/mnt/Data/COSS/Downloads/auto_news.container/dependencies/hosts
|
||||
CONTAINER_DATA=~/Bulk/COSS/Downloads/auto_news.container
|
||||
HOSTS_FILE=~/Bulk/COSS/Downloads/auto_news.container/dependencies/hosts
|
||||
|
||||
CODE=./
|
||||
XAUTHORTIY=$XAUTHORTIY
|
||||
@ -12,4 +12,5 @@ UPLOAD=false
|
||||
HEADLESS=false
|
||||
REDUCEDFETCH=false
|
||||
|
||||
ENTRYPOINT="sleep infinity"
|
||||
ENTRYPOINT="/bin/bash"
|
||||
INTERACTIVE=true
|
4
env/production
vendored
4
env/production
vendored
@ -1,7 +1,7 @@
|
||||
# Runs on the main slack channel with the full worker setup. If nothing funky has occured, reducedfetch is a speedup
|
||||
|
||||
CONTAINER_DATA=/mnt/Data/Downloads/auto_news.container
|
||||
HOSTS_FILE=/mnt/Data/COSS/Downloads/auto_news.container/dependencies/hosts
|
||||
CONTAINER_DATA=~/Bulk/COSS/Downloads/auto_news.container
|
||||
HOSTS_FILE=~/Bulk/COSS/Downloads/auto_news.container/dependencies/hosts
|
||||
|
||||
DEBUG=false
|
||||
CHECK=false
|
||||
|
4
env/upload
vendored
4
env/upload
vendored
@ -1,7 +1,7 @@
|
||||
# Does not run any other workers and only upploads to archive the urls that weren't previously uploaded
|
||||
|
||||
CONTAINER_DATA=/mnt/Data/COSS/Downloads/auto_news.container
|
||||
HOSTS_FILE=/mnt/Data/COSS/Downloads/auto_news.container/dependencies/hosts
|
||||
CONTAINER_DATA=~/Bulk/COSS/Downloads/auto_news.container
|
||||
HOSTS_FILE=~/Bulk/COSS/Downloads/auto_news.container/dependencies/hosts
|
||||
|
||||
|
||||
DEBUG=false
|
||||
|
@ -1,5 +1,3 @@
|
||||
from cmath import log
|
||||
from concurrent.futures import thread
|
||||
import sys
|
||||
sys.path.append("../app")
|
||||
import runner
|
||||
@ -8,14 +6,15 @@ logger = logging.getLogger()
|
||||
import json
|
||||
|
||||
|
||||
|
||||
logger.info("Overwriting production values for single use media-fetch")
|
||||
logger.info("Overwriting production values for single time media-fetch")
|
||||
runner.configuration.models.set_db(
|
||||
runner.configuration.SqliteDatabase("media_message_dummy.db"), # chat_db (not needed here)
|
||||
runner.configuration.SqliteDatabase("media_downloads.db")
|
||||
runner.configuration.SqliteDatabase("../.dev/media_message_dummy.db"), # chat_db (not needed here)
|
||||
runner.configuration.SqliteDatabase("../.dev/media_downloads.db")
|
||||
)
|
||||
runner.configuration.parsed["DOWNLOADS"]["local_storage_path"] = "."
|
||||
runner.configuration.parsed["DOWNLOADS"]["local_storage_path"] = "../.dev/"
|
||||
|
||||
|
||||
def fetch():
|
||||
coordinator = runner.Coordinator()
|
||||
|
||||
|
||||
@ -38,3 +37,16 @@ for u in url_list:
|
||||
dummy_thread = runner.models.Thread()
|
||||
msg = runner.models.Message(text= msg_text, thread=dummy_thread)
|
||||
coordinator.incoming_request(msg)
|
||||
|
||||
|
||||
def show():
|
||||
sel = runner.models.ArticleDownload.select()
|
||||
entries = ["title"] #, "article_url", "archive_url"]
|
||||
|
||||
for e in entries:
|
||||
r = [t.title for t in sel]
|
||||
print(r)
|
||||
# print([t for t in r])
|
||||
|
||||
|
||||
show()
|
||||
|
Loading…
x
Reference in New Issue
Block a user