minor corrections

This commit is contained in:
Remy Moll
2022-06-23 15:05:59 +02:00
parent ac9e988af3
commit 12a7de91ed
13 changed files with 218 additions and 32 deletions

View File

@@ -28,7 +28,7 @@ class PDFDownloader:
if os.getenv("HEADLESS", "false") == "true":
options.add_argument('--headless')
else:
self.logger.warning("Opening browser GUI because of 'HEADLESS=true'")
self.logger.warning("Opening browser GUI because of 'HEADLESS=false'")
options.set_preference('print.save_as_pdf.links.enabled', True)
# Just save if the filetype is pdf already, does not work!
@@ -46,7 +46,7 @@ class PDFDownloader:
# log_path = f'{config["local_storage_path"]}/geckodriver.log'
# ))
self.driver = webdriver.Remote(
command_executor = 'http://localhost:4444',
command_executor = 'http://geckodriver:4444',
options = options,
# can't set log path...
)
@@ -64,13 +64,17 @@ class PDFDownloader:
def finish(self):
if self.running:
self.logger.info("Exiting gecko driver")
self.driver.quit()
try:
self.driver.quit()
time.sleep(10)
except:
self.logger.critical("Connection to the driver broke off")
self.running = False
else:
self.logger.info("Gecko driver not yet running")
def download(self, article_object):
sleep_time = 1
sleep_time = 2
self.autostart()
url = article_object.article_url
@@ -87,7 +91,7 @@ class PDFDownloader:
# in the mean time, get a page title if required
if article_object.is_title_bad:
article_object.title = self.driver.title.replace(".pdf", "")
# will be propagated to dst as well
# will be propagated to the saved file (dst) as well
fname = article_object.fname_template
dst = os.path.join(article_object.save_path, fname)
@@ -110,7 +114,7 @@ class PDFDownloader:
else:
article_object.file_name = ""
return article_object # this change is saved later manually
return article_object # this change is saved later by the external caller
def get_exisiting_pdf(self, url, dst):