Switched from geckodriver to chrome
This commit is contained in:
		| @@ -6,10 +6,8 @@ import base64 | ||||
| import requests | ||||
| from selenium import webdriver | ||||
| import configuration | ||||
| import json | ||||
|  | ||||
| config = configuration.main_config["DOWNLOADS"] | ||||
| blacklisted = json.loads(config["blacklisted_href_domains"]) | ||||
|  | ||||
|  | ||||
| class PDFDownloader: | ||||
| @@ -21,42 +19,31 @@ class PDFDownloader: | ||||
|     def start(self): | ||||
|         self.finish() # clear up | ||||
|              | ||||
|         options = webdriver.FirefoxOptions() | ||||
|         options.profile = config["browser_profile_path"] | ||||
|         # should be options.set_preference("profile", config["browser_profile_path"]) as of selenium 4 but that doesn't work | ||||
|         options = webdriver.ChromeOptions() | ||||
|         options.add_argument(f"user-data-dir={config['browser_profile_path']}") | ||||
|         options.add_argument('--headless') | ||||
|  | ||||
|         if os.getenv("DEBUG", "false") == "true": | ||||
|             self.logger.warning("Opening browser GUI because of 'DEBUG=true'") | ||||
|         else: | ||||
|             options.add_argument('--headless') | ||||
|         # if os.getenv("DEBUG", "false") == "true": | ||||
|         #     self.logger.warning("Opening browser GUI because of 'DEBUG=true'") | ||||
|         # else: | ||||
|  | ||||
|         options.set_preference('print.save_as_pdf.links.enabled', True) | ||||
|         # Just save if the filetype is pdf already | ||||
|         # TODO: this is not working right now | ||||
|         # options.set_preference('print.save_as_pdf.links.enabled', True) | ||||
|         # # Just save if the filetype is pdf already | ||||
|         # # TODO: this is not working right now | ||||
|  | ||||
|         options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True) | ||||
|         options.set_preference("browser.download.folderList", 2) | ||||
|         # options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf") | ||||
|         # options.set_preference("pdfjs.disabled", True) | ||||
|         options.set_preference("browser.download.dir", config["default_download_path"]) | ||||
|         # options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True) | ||||
|         # options.set_preference("browser.download.folderList", 2) | ||||
|         # # options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf") | ||||
|         # # options.set_preference("pdfjs.disabled", True) | ||||
|         # options.set_preference("browser.download.dir", config["default_download_path"]) | ||||
|  | ||||
|         self.logger.info("Starting gecko driver") | ||||
|         # peviously, in a single docker image: | ||||
|         # self.driver = webdriver.Firefox( | ||||
|         #     options = options, | ||||
|         #     service = webdriver.firefox.service.Service( | ||||
|         #         log_path = f'{config["local_storage_path"]}/geckodriver.log' | ||||
|         # )) | ||||
|         self.logger.info("Starting chrome driver") | ||||
|         self.driver = webdriver.Remote( | ||||
|             command_executor = 'http://geckodriver:4444', | ||||
|             command_executor = 'http://chrome:4444', # the host chrome points to the chrome container | ||||
|             options = options, | ||||
|             # can't set log path... | ||||
|         ) | ||||
|          | ||||
|         residues = os.listdir(config["default_download_path"]) | ||||
|         for res in residues: | ||||
|             os.remove(os.path.join(config["default_download_path"], res)) | ||||
|  | ||||
|         self.running = True | ||||
|  | ||||
|     def autostart(self): | ||||
| @@ -65,7 +52,7 @@ class PDFDownloader: | ||||
|  | ||||
|     def finish(self): | ||||
|         if self.running: | ||||
|             self.logger.info("Exiting gecko driver") | ||||
|             self.logger.info("Exiting chrome driver") | ||||
|             try: | ||||
|                 self.driver.quit() | ||||
|                 time.sleep(10) | ||||
| @@ -73,7 +60,7 @@ class PDFDownloader: | ||||
|                 self.logger.critical("Connection to the driver broke off") | ||||
|             self.running = False | ||||
|         else: | ||||
|             self.logger.info("Gecko driver not yet running") | ||||
|             self.logger.info("Chrome driver not yet running") | ||||
|  | ||||
|     def download(self, article_object): | ||||
|         sleep_time = 2 | ||||
| @@ -153,8 +140,6 @@ class PDFDownloader: | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| def make_path_unique(path): | ||||
|     fname, ending = os.path.splitext(path) | ||||
|     fname += datetime.datetime.now().strftime("%d-%H%M%S") | ||||
|   | ||||
| @@ -1,4 +1,3 @@ | ||||
| from __future__ import unicode_literals | ||||
| import youtube_dl | ||||
| import os | ||||
| import logging | ||||
|   | ||||
		Reference in New Issue
	
	Block a user