Switched to docker compose and wasted hours trying to have standalone firefox
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
import os
|
||||
import sys
|
||||
import configparser
|
||||
import logging
|
||||
from peewee import SqliteDatabase
|
||||
@@ -19,18 +18,18 @@ logger = logging.getLogger(__name__)
|
||||
parsed = configparser.ConfigParser()
|
||||
parsed.read("/app/file_storage/config.ini")
|
||||
|
||||
if "debug" in sys.argv:
|
||||
logger.warning("Running in debugging mode because launched with argument 'debug'")
|
||||
# parsed.read("/code/config.ini")
|
||||
if os.getenv("DEBUG", "false") == "true":
|
||||
logger.warning("Found 'DEBUG=true', setting up dummy databases")
|
||||
|
||||
db_base_path = parsed["DATABASE"]["db_path_dev"]
|
||||
parsed["SLACK"]["archive_id"] = parsed["SLACK"]["debug_id"]
|
||||
parsed["MAIL"]["recipient"] = parsed["MAIL"]["sender"]
|
||||
else:
|
||||
logger.warning("Using production values, I hope you know what you're doing...")
|
||||
logger.warning("Found 'DEBUG=false' and running on production databases, I hope you know what you're doing...")
|
||||
|
||||
db_base_path = parsed["DATABASE"]["db_path_prod"]
|
||||
|
||||
|
||||
from utils_storage import models
|
||||
|
||||
# Set up the database
|
||||
|
9
app/requirements.txt
Normal file
9
app/requirements.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
peewee
|
||||
selenium
|
||||
youtube-dl
|
||||
waybackpy
|
||||
slack_bolt # relies on slack_sdk
|
||||
newspaper3k
|
||||
htmldate
|
||||
markdown
|
||||
rich
|
@@ -1,9 +1,9 @@
|
||||
"""Main coordination of other util classes. Handles inbound and outbound calls"""
|
||||
import configuration
|
||||
models = configuration.models
|
||||
import sys
|
||||
from threading import Thread
|
||||
import logging
|
||||
import os
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from utils_mail import runner as mail_runner
|
||||
@@ -172,12 +172,12 @@ if __name__ == "__main__":
|
||||
coordinator = Coordinator()
|
||||
|
||||
|
||||
if "upload" in sys.argv:
|
||||
if os.getenv("UPLOAD", "false") == "true":
|
||||
articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "").execute()
|
||||
logger.info(f"Launching upload to archive for {len(articles)} articles.")
|
||||
coordinator.manual_processing(articles, [UploadWorker()])
|
||||
|
||||
elif "check" in sys.argv:
|
||||
elif os.getenv("CHECK", "false") == "true":
|
||||
from utils_check import runner as check_runner
|
||||
check_runner.verify_unchecked()
|
||||
|
||||
|
@@ -3,7 +3,6 @@ import configuration
|
||||
import requests
|
||||
import os
|
||||
import time
|
||||
import sys
|
||||
from threading import Thread
|
||||
from slack_sdk.errors import SlackApiError
|
||||
|
||||
@@ -30,10 +29,10 @@ def init(client) -> None:
|
||||
t = Thread(target = fetch_missed_channel_reactions) # threaded, runs in background (usually takes a long time)
|
||||
t.start()
|
||||
|
||||
if "reducedfetch" in sys.argv:
|
||||
logger.warning("Only fetching empty threads for bot messages because of argument 'reducedfetch'")
|
||||
if os.getenv("REDUCEDFETCH", "false") == "true":
|
||||
logger.warning("Only fetching empty threads for bot messages because 'REDUCEDFETCH=true'")
|
||||
fetch_missed_thread_messages(reduced=True)
|
||||
else: # perform these two asyncronously
|
||||
else: # perform both asyncronously
|
||||
fetch_missed_thread_messages()
|
||||
|
||||
|
||||
|
@@ -2,7 +2,6 @@ import time
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import base64
|
||||
import requests
|
||||
from selenium import webdriver
|
||||
@@ -20,28 +19,34 @@ class PDFDownloader:
|
||||
running = False
|
||||
|
||||
def start(self):
|
||||
options=Options()
|
||||
try:
|
||||
self.finish()
|
||||
except:
|
||||
self.logger.info("gecko driver not yet running")
|
||||
options = webdriver.FirefoxOptions()
|
||||
options.profile = config["browser_profile_path"]
|
||||
if "notheadless" in sys.argv:
|
||||
self.logger.warning("Opening browser GUI because of Argument 'notheadless'")
|
||||
else:
|
||||
# should be options.set_preference("profile", config["browser_profile_path"]) as of selenium 4 but that doesn't work
|
||||
|
||||
if os.getenv("HEADLESS", "false") == "true":
|
||||
options.add_argument('--headless')
|
||||
else:
|
||||
self.logger.warning("Opening browser GUI because of 'HEADLESS=true'")
|
||||
|
||||
# Print to pdf
|
||||
options.set_preference("print_printer", "Mozilla Save to PDF")
|
||||
options.set_preference("print.always_print_silent", True)
|
||||
options.set_preference("print.show_print_progress", False)
|
||||
options.set_preference('print.save_as_pdf.links.enabled', True)
|
||||
|
||||
# Just save if the filetype is pdf already, does not work!
|
||||
|
||||
options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True)
|
||||
options.set_preference("browser.download.folderList", 2)
|
||||
# options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
|
||||
# options.set_preference("pdfjs.disabled", True)
|
||||
options.set_preference("browser.download.dir", config["default_download_path"])
|
||||
|
||||
self.logger.info("Now Starting gecko driver")
|
||||
self.driver = webdriver.Firefox(options=options)
|
||||
self.logger.info("Starting gecko driver")
|
||||
self.driver = webdriver.Firefox(
|
||||
options = options,
|
||||
service = webdriver.firefox.service.Service(
|
||||
log_path = f'{config["local_storage_path"]}/geckodriver.log'
|
||||
))
|
||||
|
||||
residues = os.listdir(config["default_download_path"])
|
||||
for res in residues:
|
||||
@@ -54,6 +59,7 @@ class PDFDownloader:
|
||||
self.start() # relaunch the dl util
|
||||
|
||||
def finish(self):
|
||||
self.logger.info("Exiting gecko driver")
|
||||
self.driver.quit()
|
||||
self.running = False
|
||||
|
||||
|
Reference in New Issue
Block a user