FS updates and corrections
This commit is contained in:
parent
54760abee4
commit
87d65fc988
4
.vscode/settings.json
vendored
Normal file
4
.vscode/settings.json
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
{
|
||||||
|
"python.linting.flake8Enabled": true,
|
||||||
|
"python.linting.enabled": false
|
||||||
|
}
|
@ -2,7 +2,6 @@ FROM python:latest
|
|||||||
|
|
||||||
ENV TZ Euopre/Zurich
|
ENV TZ Euopre/Zurich
|
||||||
|
|
||||||
|
|
||||||
RUN echo "deb http://deb.debian.org/debian/ unstable main contrib non-free" >> /etc/apt/sources.list
|
RUN echo "deb http://deb.debian.org/debian/ unstable main contrib non-free" >> /etc/apt/sources.list
|
||||||
RUN apt-get update && apt-get install -y \
|
RUN apt-get update && apt-get install -y \
|
||||||
evince \
|
evince \
|
||||||
|
@ -49,9 +49,15 @@ I also wrote a rudimentary docker compose file which makes running much more sim
|
|||||||
|
|
||||||
All relevant passthroughs and mounts are specified through the env-file, for which I configured 4 versions: production, debug (development in general), upload and check. These files will have to be adapted to your individual setup but can be reused more easily.
|
All relevant passthroughs and mounts are specified through the env-file, for which I configured 4 versions: production, debug (development in general), upload and check. These files will have to be adapted to your individual setup but can be reused more easily.
|
||||||
|
|
||||||
> Note:
|
For the debug env-file, you will likely want interactivity, so you need to run:
|
||||||
|
|
||||||
|
`docker compose --env-file env/debug run auto_news`
|
||||||
|
|
||||||
|
<!-- > Note:
|
||||||
>
|
>
|
||||||
> The `debug` requires additional input. Once `docker compose up` is running, in a new session run `docker compose --env-file env/debug exec bash`. The live-mounted code is then under `/code`. Note that the `DEBUG=true` environment variable is still set. If you want to test things on production, run `export DEBUG=false`.
|
> The `debug` requires additional input. Once `docker compose up` is running, in a new session run `docker compose --env-file env/debug exec bash`. The live-mounted code is then under `/code`. Note that the `DEBUG=true` environment variable is still set. If you want to test things on production, run `export DEBUG=false`.
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Building
|
## Building
|
||||||
|
@ -8,7 +8,7 @@ from rich.logging import RichHandler
|
|||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
format='%(message)s',
|
format='%(message)s',
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
datefmt='%Y-%m-%d %H:%M:%S',
|
datefmt='%H:%M:%S', # add %Y-%m-%d if needed
|
||||||
handlers=[RichHandler()]
|
handlers=[RichHandler()]
|
||||||
)
|
)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -158,10 +158,11 @@ def verify_unchecked():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# close any previously opened windows:
|
# close any previously opened windows:
|
||||||
subprocess.call("killall evince")
|
subprocess.call(["kill", "`pgrep evince`"])
|
||||||
# then open a new one
|
# then open a new one
|
||||||
subprocess.Popen(["evince", f"file://{os.path.join(article.save_path, article.file_name)}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
subprocess.Popen(["evince", f"file://{os.path.join(article.save_path, article.file_name)}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
# supress evince gtk warnings
|
# supress evince gtk warnings
|
||||||
|
print("done")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
continue
|
continue
|
||||||
|
@ -207,7 +207,11 @@ class Thread(ChatBaseModel):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def initiator_message(self):
|
def initiator_message(self):
|
||||||
|
try:
|
||||||
return self.messages[0] # TODO check if this needs sorting
|
return self.messages[0] # TODO check if this needs sorting
|
||||||
|
except IndexError:
|
||||||
|
logger.warning(f"Thread {self} is empty. How can that be?")
|
||||||
|
return None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def message_count(self):
|
def message_count(self):
|
||||||
@ -222,6 +226,9 @@ class Thread(ChatBaseModel):
|
|||||||
@property
|
@property
|
||||||
def is_fully_processed(self) -> bool:
|
def is_fully_processed(self) -> bool:
|
||||||
init_message = self.initiator_message
|
init_message = self.initiator_message
|
||||||
|
if init_message is None:
|
||||||
|
return False
|
||||||
|
|
||||||
if init_message.is_processed_override:
|
if init_message.is_processed_override:
|
||||||
return True
|
return True
|
||||||
# this override is set for instance, when no url was sent at all. Then set this thread to be ignored
|
# this override is set for instance, when no url was sent at all. Then set this thread to be ignored
|
||||||
|
@ -5,13 +5,13 @@ import os
|
|||||||
import base64
|
import base64
|
||||||
import requests
|
import requests
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.firefox.options import Options
|
|
||||||
import configuration
|
import configuration
|
||||||
import json
|
import json
|
||||||
|
|
||||||
config = configuration.parsed["DOWNLOADS"]
|
config = configuration.parsed["DOWNLOADS"]
|
||||||
blacklisted = json.loads(config["blacklisted_href_domains"])
|
blacklisted = json.loads(config["blacklisted_href_domains"])
|
||||||
|
|
||||||
|
|
||||||
class PDFDownloader:
|
class PDFDownloader:
|
||||||
"""Saves a given url. Fills the object it got as a parameter"""
|
"""Saves a given url. Fills the object it got as a parameter"""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@ -19,10 +19,8 @@ class PDFDownloader:
|
|||||||
running = False
|
running = False
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
try:
|
self.finish() # clear up
|
||||||
self.finish()
|
|
||||||
except:
|
|
||||||
self.logger.info("gecko driver not yet running")
|
|
||||||
options = webdriver.FirefoxOptions()
|
options = webdriver.FirefoxOptions()
|
||||||
options.profile = config["browser_profile_path"]
|
options.profile = config["browser_profile_path"]
|
||||||
# should be options.set_preference("profile", config["browser_profile_path"]) as of selenium 4 but that doesn't work
|
# should be options.set_preference("profile", config["browser_profile_path"]) as of selenium 4 but that doesn't work
|
||||||
@ -59,10 +57,12 @@ class PDFDownloader:
|
|||||||
self.start() # relaunch the dl util
|
self.start() # relaunch the dl util
|
||||||
|
|
||||||
def finish(self):
|
def finish(self):
|
||||||
|
if self.running:
|
||||||
self.logger.info("Exiting gecko driver")
|
self.logger.info("Exiting gecko driver")
|
||||||
self.driver.quit()
|
self.driver.quit()
|
||||||
self.running = False
|
self.running = False
|
||||||
|
else:
|
||||||
|
self.logger.info("Gecko driver not yet running")
|
||||||
|
|
||||||
def download(self, article_object):
|
def download(self, article_object):
|
||||||
sleep_time = 1
|
sleep_time = 1
|
||||||
|
@ -5,6 +5,7 @@ version: "3.9"
|
|||||||
services:
|
services:
|
||||||
auto_news:
|
auto_news:
|
||||||
build: .
|
build: .
|
||||||
|
image: auto_news:latest
|
||||||
volumes:
|
volumes:
|
||||||
- ${CONTAINER_DATA}:/app/file_storage
|
- ${CONTAINER_DATA}:/app/file_storage
|
||||||
- ${HOSTS_FILE}:/etc/hosts
|
- ${HOSTS_FILE}:/etc/hosts
|
||||||
@ -14,13 +15,17 @@ services:
|
|||||||
network_mode: host
|
network_mode: host
|
||||||
environment:
|
environment:
|
||||||
- DISPLAY=$DISPLAY
|
- DISPLAY=$DISPLAY
|
||||||
|
- TERM=xterm-256color # colored logs
|
||||||
|
- COLUMNS=160 # for wider logs
|
||||||
- DEBUG=${DEBUG}
|
- DEBUG=${DEBUG}
|
||||||
- CHECK=${CHECK}
|
- CHECK=${CHECK}
|
||||||
- UPLOAD=${UPLOAD}
|
- UPLOAD=${UPLOAD}
|
||||||
- HEADLESS=${HEADLESS}
|
- HEADLESS=${HEADLESS}
|
||||||
- REDUCEDFETCH=${REDUCEDFETCH}
|
- REDUCEDFETCH=${REDUCEDFETCH}
|
||||||
|
entrypoint: ${ENTRYPOINT:-python3 runner.py} # by default launch workers as defined in the Dockerfile
|
||||||
|
stdin_open: ${INTERACTIVE:-false} # docker run -i
|
||||||
|
tty: ${INTERACTIVE:-false} # docker run -t
|
||||||
|
|
||||||
entrypoint: ${ENTRYPOINT:-"python3 runner.py"} # by default launch workers as defined in the Dockerfile
|
|
||||||
|
|
||||||
# geckodriver:
|
# geckodriver:
|
||||||
# image: selenium/standalone-firefox:100.0
|
# image: selenium/standalone-firefox:100.0
|
||||||
|
4
env/check
vendored
4
env/check
vendored
@ -1,7 +1,7 @@
|
|||||||
# Does not run any downloads but displays the previously downloaded but not yet checked files. Requires display-acces via xauth
|
# Does not run any downloads but displays the previously downloaded but not yet checked files. Requires display-acces via xauth
|
||||||
|
|
||||||
CONTAINER_DATA=/mnt/Data/COSS/Downloads/auto_news.container
|
CONTAINER_DATA=~/Bulk/COSS/Downloads/auto_news.container
|
||||||
HOSTS_FILE=/mnt/Data/COSS/Downloads/auto_news.container/dependencies/hosts
|
HOSTS_FILE=~/Bulk/COSS/Downloads/auto_news.container/dependencies/hosts
|
||||||
|
|
||||||
XAUTHORTIY=$XAUTHORTIY
|
XAUTHORTIY=$XAUTHORTIY
|
||||||
|
|
||||||
|
7
env/debug
vendored
7
env/debug
vendored
@ -1,7 +1,7 @@
|
|||||||
# Runs in a debugging mode, does not launch anything at all but starts a bash process
|
# Runs in a debugging mode, does not launch anything at all but starts a bash process
|
||||||
|
|
||||||
CONTAINER_DATA=/mnt/Data/COSS/Downloads/auto_news.container
|
CONTAINER_DATA=~/Bulk/COSS/Downloads/auto_news.container
|
||||||
HOSTS_FILE=/mnt/Data/COSS/Downloads/auto_news.container/dependencies/hosts
|
HOSTS_FILE=~/Bulk/COSS/Downloads/auto_news.container/dependencies/hosts
|
||||||
|
|
||||||
CODE=./
|
CODE=./
|
||||||
XAUTHORTIY=$XAUTHORTIY
|
XAUTHORTIY=$XAUTHORTIY
|
||||||
@ -12,4 +12,5 @@ UPLOAD=false
|
|||||||
HEADLESS=false
|
HEADLESS=false
|
||||||
REDUCEDFETCH=false
|
REDUCEDFETCH=false
|
||||||
|
|
||||||
ENTRYPOINT="sleep infinity"
|
ENTRYPOINT="/bin/bash"
|
||||||
|
INTERACTIVE=true
|
4
env/production
vendored
4
env/production
vendored
@ -1,7 +1,7 @@
|
|||||||
# Runs on the main slack channel with the full worker setup. If nothing funky has occured, reducedfetch is a speedup
|
# Runs on the main slack channel with the full worker setup. If nothing funky has occured, reducedfetch is a speedup
|
||||||
|
|
||||||
CONTAINER_DATA=/mnt/Data/Downloads/auto_news.container
|
CONTAINER_DATA=~/Bulk/COSS/Downloads/auto_news.container
|
||||||
HOSTS_FILE=/mnt/Data/COSS/Downloads/auto_news.container/dependencies/hosts
|
HOSTS_FILE=~/Bulk/COSS/Downloads/auto_news.container/dependencies/hosts
|
||||||
|
|
||||||
DEBUG=false
|
DEBUG=false
|
||||||
CHECK=false
|
CHECK=false
|
||||||
|
4
env/upload
vendored
4
env/upload
vendored
@ -1,7 +1,7 @@
|
|||||||
# Does not run any other workers and only upploads to archive the urls that weren't previously uploaded
|
# Does not run any other workers and only upploads to archive the urls that weren't previously uploaded
|
||||||
|
|
||||||
CONTAINER_DATA=/mnt/Data/COSS/Downloads/auto_news.container
|
CONTAINER_DATA=~/Bulk/COSS/Downloads/auto_news.container
|
||||||
HOSTS_FILE=/mnt/Data/COSS/Downloads/auto_news.container/dependencies/hosts
|
HOSTS_FILE=~/Bulk/COSS/Downloads/auto_news.container/dependencies/hosts
|
||||||
|
|
||||||
|
|
||||||
DEBUG=false
|
DEBUG=false
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
from cmath import log
|
|
||||||
from concurrent.futures import thread
|
|
||||||
import sys
|
import sys
|
||||||
sys.path.append("../app")
|
sys.path.append("../app")
|
||||||
import runner
|
import runner
|
||||||
@ -8,14 +6,15 @@ logger = logging.getLogger()
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
logger.info("Overwriting production values for single time media-fetch")
|
||||||
logger.info("Overwriting production values for single use media-fetch")
|
|
||||||
runner.configuration.models.set_db(
|
runner.configuration.models.set_db(
|
||||||
runner.configuration.SqliteDatabase("media_message_dummy.db"), # chat_db (not needed here)
|
runner.configuration.SqliteDatabase("../.dev/media_message_dummy.db"), # chat_db (not needed here)
|
||||||
runner.configuration.SqliteDatabase("media_downloads.db")
|
runner.configuration.SqliteDatabase("../.dev/media_downloads.db")
|
||||||
)
|
)
|
||||||
runner.configuration.parsed["DOWNLOADS"]["local_storage_path"] = "."
|
runner.configuration.parsed["DOWNLOADS"]["local_storage_path"] = "../.dev/"
|
||||||
|
|
||||||
|
|
||||||
|
def fetch():
|
||||||
coordinator = runner.Coordinator()
|
coordinator = runner.Coordinator()
|
||||||
|
|
||||||
|
|
||||||
@ -38,3 +37,16 @@ for u in url_list:
|
|||||||
dummy_thread = runner.models.Thread()
|
dummy_thread = runner.models.Thread()
|
||||||
msg = runner.models.Message(text= msg_text, thread=dummy_thread)
|
msg = runner.models.Message(text= msg_text, thread=dummy_thread)
|
||||||
coordinator.incoming_request(msg)
|
coordinator.incoming_request(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def show():
|
||||||
|
sel = runner.models.ArticleDownload.select()
|
||||||
|
entries = ["title"] #, "article_url", "archive_url"]
|
||||||
|
|
||||||
|
for e in entries:
|
||||||
|
r = [t.title for t in sel]
|
||||||
|
print(r)
|
||||||
|
# print([t for t in r])
|
||||||
|
|
||||||
|
|
||||||
|
show()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user