Switched to docker compose and wasted hours trying to have standalone firefox

This commit is contained in:
Remy Moll
2022-05-29 18:29:31 +02:00
parent 878a1dff5d
commit 54760abee4
13 changed files with 154 additions and 42 deletions

View File

@@ -1,5 +1,4 @@
import os
import sys
import configparser
import logging
from peewee import SqliteDatabase
@@ -19,18 +18,18 @@ logger = logging.getLogger(__name__)
parsed = configparser.ConfigParser()
parsed.read("/app/file_storage/config.ini")
if "debug" in sys.argv:
logger.warning("Running in debugging mode because launched with argument 'debug'")
# parsed.read("/code/config.ini")
if os.getenv("DEBUG", "false") == "true":
logger.warning("Found 'DEBUG=true', setting up dummy databases")
db_base_path = parsed["DATABASE"]["db_path_dev"]
parsed["SLACK"]["archive_id"] = parsed["SLACK"]["debug_id"]
parsed["MAIL"]["recipient"] = parsed["MAIL"]["sender"]
else:
logger.warning("Using production values, I hope you know what you're doing...")
logger.warning("Found 'DEBUG=false' and running on production databases, I hope you know what you're doing...")
db_base_path = parsed["DATABASE"]["db_path_prod"]
from utils_storage import models
# Set up the database

9
app/requirements.txt Normal file
View File

@@ -0,0 +1,9 @@
peewee
selenium
youtube-dl
waybackpy
slack_bolt # relies on slack_sdk
newspaper3k
htmldate
markdown
rich

View File

@@ -1,9 +1,9 @@
"""Main coordination of other util classes. Handles inbound and outbound calls"""
import configuration
models = configuration.models
import sys
from threading import Thread
import logging
import os
logger = logging.getLogger(__name__)
from utils_mail import runner as mail_runner
@@ -172,12 +172,12 @@ if __name__ == "__main__":
coordinator = Coordinator()
if "upload" in sys.argv:
if os.getenv("UPLOAD", "false") == "true":
articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "").execute()
logger.info(f"Launching upload to archive for {len(articles)} articles.")
coordinator.manual_processing(articles, [UploadWorker()])
elif "check" in sys.argv:
elif os.getenv("CHECK", "false") == "true":
from utils_check import runner as check_runner
check_runner.verify_unchecked()

View File

@@ -3,7 +3,6 @@ import configuration
import requests
import os
import time
import sys
from threading import Thread
from slack_sdk.errors import SlackApiError
@@ -30,10 +29,10 @@ def init(client) -> None:
t = Thread(target = fetch_missed_channel_reactions) # threaded, runs in background (usually takes a long time)
t.start()
if "reducedfetch" in sys.argv:
logger.warning("Only fetching empty threads for bot messages because of argument 'reducedfetch'")
if os.getenv("REDUCEDFETCH", "false") == "true":
logger.warning("Only fetching empty threads for bot messages because 'REDUCEDFETCH=true'")
fetch_missed_thread_messages(reduced=True)
else: # perform these two asyncronously
else: # perform both asyncronously
fetch_missed_thread_messages()

View File

@@ -2,7 +2,6 @@ import time
import datetime
import logging
import os
import sys
import base64
import requests
from selenium import webdriver
@@ -20,28 +19,34 @@ class PDFDownloader:
running = False
def start(self):
options=Options()
try:
self.finish()
except:
self.logger.info("gecko driver not yet running")
options = webdriver.FirefoxOptions()
options.profile = config["browser_profile_path"]
if "notheadless" in sys.argv:
self.logger.warning("Opening browser GUI because of Argument 'notheadless'")
else:
# should be options.set_preference("profile", config["browser_profile_path"]) as of selenium 4 but that doesn't work
if os.getenv("HEADLESS", "false") == "true":
options.add_argument('--headless')
else:
self.logger.warning("Opening browser GUI because of 'HEADLESS=true'")
# Print to pdf
options.set_preference("print_printer", "Mozilla Save to PDF")
options.set_preference("print.always_print_silent", True)
options.set_preference("print.show_print_progress", False)
options.set_preference('print.save_as_pdf.links.enabled', True)
# Just save if the filetype is pdf already, does not work!
options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True)
options.set_preference("browser.download.folderList", 2)
# options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
# options.set_preference("pdfjs.disabled", True)
options.set_preference("browser.download.dir", config["default_download_path"])
self.logger.info("Now Starting gecko driver")
self.driver = webdriver.Firefox(options=options)
self.logger.info("Starting gecko driver")
self.driver = webdriver.Firefox(
options = options,
service = webdriver.firefox.service.Service(
log_path = f'{config["local_storage_path"]}/geckodriver.log'
))
residues = os.listdir(config["default_download_path"])
for res in residues:
@@ -54,6 +59,7 @@ class PDFDownloader:
self.start() # relaunch the dl util
def finish(self):
self.logger.info("Exiting gecko driver")
self.driver.quit()
self.running = False