FS updates and corrections
This commit is contained in:
		| @@ -5,13 +5,13 @@ import os | ||||
| import base64 | ||||
| import requests | ||||
| from selenium import webdriver | ||||
| from selenium.webdriver.firefox.options import Options | ||||
| import configuration | ||||
| import json | ||||
|  | ||||
| config = configuration.parsed["DOWNLOADS"] | ||||
| blacklisted = json.loads(config["blacklisted_href_domains"]) | ||||
|  | ||||
|  | ||||
| class PDFDownloader: | ||||
|     """Saves a given url. Fills the object it got as a parameter""" | ||||
|     logger = logging.getLogger(__name__) | ||||
| @@ -19,10 +19,8 @@ class PDFDownloader: | ||||
|     running = False | ||||
|      | ||||
|     def start(self): | ||||
|         try: | ||||
|             self.finish() | ||||
|         except: | ||||
|             self.logger.info("gecko driver not yet running") | ||||
|         self.finish() # clear up | ||||
|              | ||||
|         options = webdriver.FirefoxOptions() | ||||
|         options.profile = config["browser_profile_path"] | ||||
|         # should be options.set_preference("profile", config["browser_profile_path"]) as of selenium 4 but that doesn't work | ||||
| @@ -56,13 +54,15 @@ class PDFDownloader: | ||||
|  | ||||
|     def autostart(self): | ||||
|         if not self.running: | ||||
|             self.start() # relaunch the dl util     | ||||
|             self.start()  # relaunch the dl util | ||||
|  | ||||
|     def finish(self): | ||||
|         self.logger.info("Exiting gecko driver") | ||||
|         self.driver.quit() | ||||
|         self.running = False | ||||
|  | ||||
|         if self.running: | ||||
|             self.logger.info("Exiting gecko driver") | ||||
|             self.driver.quit() | ||||
|             self.running = False | ||||
|         else: | ||||
|             self.logger.info("Gecko driver not yet running") | ||||
|  | ||||
|     def download(self, article_object): | ||||
|         sleep_time = 1 | ||||
| @@ -74,14 +74,14 @@ class PDFDownloader: | ||||
|         except Exception as e: | ||||
|             self.logger.critical("Selenium .get(url) failed with error {}".format(e)) | ||||
|             self.finish() | ||||
|             return article_object # without changes | ||||
|             return article_object  # without changes | ||||
|          | ||||
|         time.sleep(sleep_time) | ||||
|         # leave the page time to do any funky business | ||||
|  | ||||
|         # in the mean time, get a page title if required | ||||
|         if article_object.is_title_bad: | ||||
|             article_object.title = self.driver.title.replace(".pdf","") | ||||
|             article_object.title = self.driver.title.replace(".pdf", "") | ||||
|             # will be propagated to dst as well | ||||
|  | ||||
|         fname = article_object.fname_template | ||||
| @@ -105,7 +105,7 @@ class PDFDownloader: | ||||
|         else: | ||||
|             article_object.file_name = "" | ||||
|          | ||||
|         return article_object # this change is saved later manually | ||||
|         return article_object  # this change is saved later manually | ||||
|  | ||||
|  | ||||
|     def get_exisiting_pdf(self, url, dst): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Remy Moll
					Remy Moll