Working and up to date. WIP misc manual actions
This commit is contained in:
		| @@ -2,6 +2,7 @@ import time | ||||
| import datetime | ||||
| import logging | ||||
| import os | ||||
| import sys | ||||
| import base64 | ||||
| import requests | ||||
| from selenium import webdriver | ||||
| @@ -21,18 +22,19 @@ class PDFDownloader: | ||||
|     def start(self): | ||||
|         options=Options() | ||||
|         options.profile = config["browser_profile_path"] | ||||
|         # TODO: Get headless mode interactively | ||||
|         options.add_argument('--headless') | ||||
|         # options.add_argument("--disable-infobars") | ||||
|         # options.set_preference("javascript.enabled", False) | ||||
|         # options.add_argument("--disable-popup-blocking") | ||||
|         if "notheadless" in sys.argv: | ||||
|             self.logger.warning("Opening browser GUI because of Argument 'notheadless'") | ||||
|         else: | ||||
|             options.add_argument('--headless') | ||||
|  | ||||
|         # Print to pdf | ||||
|         options.set_preference("print_printer", "Mozilla Save to PDF") | ||||
|         options.set_preference("print.always_print_silent", True) | ||||
|         options.set_preference("print.show_print_progress", False) | ||||
|         options.set_preference('print.save_as_pdf.links.enabled', True) | ||||
|  | ||||
|         # Just save if the filetype is pdf already, does not work! | ||||
|         options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True) | ||||
|         # Save existing pdf | ||||
|         options.set_preference("browser.download.folderList", 2) | ||||
|         # options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf") | ||||
|         # options.set_preference("pdfjs.disabled", True) | ||||
| @@ -140,7 +142,7 @@ class PDFDownloader: | ||||
|         hrefs = [h for h in hrefs \ | ||||
|             if not sum([(domain in h) for domain in blacklisted]) # sum([True, False, False, False]) == 1 (esp. not 0) | ||||
|             ] # filter a tiny bit at least | ||||
|         self.logger.info(f"Hrefs result (before:{len_old}, after: {len(hrefs)})") | ||||
|         self.logger.info(f"Hrefs filtered (before: {len_old}, after: {len(hrefs)})") | ||||
|         return hrefs | ||||
|  | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Remy Moll
					Remy Moll