FS updates and corrections

2022-06-15 11:14:08 +02:00
parent 54760abee4
commit 87d65fc988
14 changed files with 91 additions and 56 deletions
--- a/app/configuration.py
+++ b/app/configuration.py
@@ -8,7 +8,7 @@ from rich.logging import RichHandler
 logging.basicConfig(
    format='%(message)s',
    level=logging.INFO,
-    datefmt='%Y-%m-%d %H:%M:%S',
+    datefmt='%H:%M:%S', # add %Y-%m-%d if needed
    handlers=[RichHandler()]
    )
 logger = logging.getLogger(__name__)
--- a/app/utils_check/runner.py
+++ b/app/utils_check/runner.py
@@ -158,10 +158,11 @@ def verify_unchecked():
        
        try:
            # close any previously opened windows:
-            subprocess.call("killall evince")
+            subprocess.call(["kill", "`pgrep evince`"])
            # then open a new one
            subprocess.Popen(["evince", f"file://{os.path.join(article.save_path, article.file_name)}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            # supress evince gtk warnings
+            print("done")
        except Exception as e:
            print(e)
            continue
--- a/app/utils_storage/models.py
+++ b/app/utils_storage/models.py
@@ -207,7 +207,11 @@ class Thread(ChatBaseModel):

    @property
    def initiator_message(self):
-        return self.messages[0] # TODO check if this needs sorting
+        try:
+            return self.messages[0] # TODO check if this needs sorting
+        except IndexError:
+            logger.warning(f"Thread {self} is empty. How can that be?")
+            return None

    @property
    def message_count(self):
@@ -222,6 +226,9 @@ class Thread(ChatBaseModel):
    @property
    def is_fully_processed(self) -> bool:
        init_message = self.initiator_message
+        if init_message is None:
+            return False
+        
        if init_message.is_processed_override:
            return True
        # this override is set for instance, when no url was sent at all. Then set this thread to be ignored
--- a/app/utils_worker/download/browser.py
+++ b/app/utils_worker/download/browser.py
@@ -5,13 +5,13 @@ import os
 import base64
 import requests
 from selenium import webdriver
-from selenium.webdriver.firefox.options import Options
 import configuration
 import json

 config = configuration.parsed["DOWNLOADS"]
 blacklisted = json.loads(config["blacklisted_href_domains"])

+
 class PDFDownloader:
    """Saves a given url. Fills the object it got as a parameter"""
    logger = logging.getLogger(__name__)
@@ -19,10 +19,8 @@ class PDFDownloader:
    running = False
    
    def start(self):
-        try:
-            self.finish()
-        except:
-            self.logger.info("gecko driver not yet running")
+        self.finish() # clear up
+            
        options = webdriver.FirefoxOptions()
        options.profile = config["browser_profile_path"]
        # should be options.set_preference("profile", config["browser_profile_path"]) as of selenium 4 but that doesn't work
@@ -56,13 +54,15 @@ class PDFDownloader:

    def autostart(self):
        if not self.running:
-            self.start() # relaunch the dl util    
+            self.start()  # relaunch the dl util

    def finish(self):
-        self.logger.info("Exiting gecko driver")
-        self.driver.quit()
-        self.running = False
-
+        if self.running:
+            self.logger.info("Exiting gecko driver")
+            self.driver.quit()
+            self.running = False
+        else:
+            self.logger.info("Gecko driver not yet running")

    def download(self, article_object):
        sleep_time = 1
@@ -74,14 +74,14 @@ class PDFDownloader:
        except Exception as e:
            self.logger.critical("Selenium .get(url) failed with error {}".format(e))
            self.finish()
-            return article_object # without changes
+            return article_object  # without changes
        
        time.sleep(sleep_time)
        # leave the page time to do any funky business

        # in the mean time, get a page title if required
        if article_object.is_title_bad:
-            article_object.title = self.driver.title.replace(".pdf","")
+            article_object.title = self.driver.title.replace(".pdf", "")
            # will be propagated to dst as well

        fname = article_object.fname_template
@@ -105,7 +105,7 @@ class PDFDownloader:
        else:
            article_object.file_name = ""
        
-        return article_object # this change is saved later manually
+        return article_object  # this change is saved later manually


    def get_exisiting_pdf(self, url, dst):