minor corrections
This commit is contained in:
		@@ -28,7 +28,7 @@ class PDFDownloader:
 | 
			
		||||
        if os.getenv("HEADLESS", "false") == "true":
 | 
			
		||||
            options.add_argument('--headless')
 | 
			
		||||
        else:
 | 
			
		||||
            self.logger.warning("Opening browser GUI because of 'HEADLESS=true'")
 | 
			
		||||
            self.logger.warning("Opening browser GUI because of 'HEADLESS=false'")
 | 
			
		||||
 | 
			
		||||
        options.set_preference('print.save_as_pdf.links.enabled', True)
 | 
			
		||||
        # Just save if the filetype is pdf already, does not work!
 | 
			
		||||
@@ -46,7 +46,7 @@ class PDFDownloader:
 | 
			
		||||
        #         log_path = f'{config["local_storage_path"]}/geckodriver.log'
 | 
			
		||||
        # ))
 | 
			
		||||
        self.driver = webdriver.Remote(
 | 
			
		||||
            command_executor = 'http://localhost:4444',
 | 
			
		||||
            command_executor = 'http://geckodriver:4444',
 | 
			
		||||
            options = options,
 | 
			
		||||
            # can't set log path...
 | 
			
		||||
        )
 | 
			
		||||
@@ -64,13 +64,17 @@ class PDFDownloader:
 | 
			
		||||
    def finish(self):
 | 
			
		||||
        if self.running:
 | 
			
		||||
            self.logger.info("Exiting gecko driver")
 | 
			
		||||
            self.driver.quit()
 | 
			
		||||
            try:
 | 
			
		||||
                self.driver.quit()
 | 
			
		||||
                time.sleep(10)
 | 
			
		||||
            except:
 | 
			
		||||
                self.logger.critical("Connection to the driver broke off")
 | 
			
		||||
            self.running = False
 | 
			
		||||
        else:
 | 
			
		||||
            self.logger.info("Gecko driver not yet running")
 | 
			
		||||
 | 
			
		||||
    def download(self, article_object):
 | 
			
		||||
        sleep_time = 1
 | 
			
		||||
        sleep_time = 2
 | 
			
		||||
        self.autostart()
 | 
			
		||||
        url = article_object.article_url
 | 
			
		||||
 | 
			
		||||
@@ -87,7 +91,7 @@ class PDFDownloader:
 | 
			
		||||
        # in the mean time, get a page title if required
 | 
			
		||||
        if article_object.is_title_bad:
 | 
			
		||||
            article_object.title = self.driver.title.replace(".pdf", "")
 | 
			
		||||
            # will be propagated to dst as well
 | 
			
		||||
            # will be propagated to the saved file (dst) as well
 | 
			
		||||
 | 
			
		||||
        fname = article_object.fname_template
 | 
			
		||||
        dst = os.path.join(article_object.save_path, fname)
 | 
			
		||||
@@ -110,7 +114,7 @@ class PDFDownloader:
 | 
			
		||||
        else:
 | 
			
		||||
            article_object.file_name = ""
 | 
			
		||||
        
 | 
			
		||||
        return article_object  # this change is saved later manually
 | 
			
		||||
        return article_object  # this change is saved later by the external caller
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def get_exisiting_pdf(self, url, dst):
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,4 @@
 | 
			
		||||
import time
 | 
			
		||||
from waybackpy import WaybackMachineSaveAPI # upload to archive.org
 | 
			
		||||
import logging
 | 
			
		||||
logger = logging.getLogger(__name__)
 | 
			
		||||
@@ -11,6 +12,8 @@ def upload_to_archive(article_object):
 | 
			
		||||
        archive_url = wayback.save()
 | 
			
		||||
        # logger.info(f"{url} uploaded to archive successfully")
 | 
			
		||||
        article_object.archive_url = archive_url
 | 
			
		||||
        # time.sleep(4) # Archive Uploads rate limited to 15/minute
 | 
			
		||||
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        article_object.archive_url = "Error while uploading: {}".format(e)
 | 
			
		||||
        logger.error(f"Error while generating archive url: {e}")
 | 
			
		||||
 
 | 
			
		||||
@@ -43,11 +43,15 @@ class FetchWorker(TemplateWorker):
 | 
			
		||||
class UploadWorker(TemplateWorker):
 | 
			
		||||
    def __init__(self) -> None:
 | 
			
		||||
        super().__init__()
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def _handle_article(self, article_watcher):
 | 
			
		||||
        action = run_upload # function
 | 
			
		||||
        def action(*args, **kwargs):
 | 
			
		||||
            run_upload(*args, **kwargs)
 | 
			
		||||
            time.sleep(5) # uploads to archive are throttled to 15/minute
 | 
			
		||||
 | 
			
		||||
        super()._handle_article(article_watcher, action)
 | 
			
		||||
        time.sleep(4) # Archive Uploads rate limited to 15/minute
 | 
			
		||||
        article_watcher.upload_completed = True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user