Working and up to date. WIP misc manual actions

This commit is contained in:
Remy Moll
2022-05-24 18:37:30 +02:00
parent 246729d376
commit 878a1dff5d
14 changed files with 142 additions and 182 deletions

View File

@@ -2,6 +2,7 @@ import time
import datetime
import logging
import os
import sys
import base64
import requests
from selenium import webdriver
@@ -21,18 +22,19 @@ class PDFDownloader:
def start(self):
options=Options()
options.profile = config["browser_profile_path"]
# TODO: Get headless mode interactively
options.add_argument('--headless')
# options.add_argument("--disable-infobars")
# options.set_preference("javascript.enabled", False)
# options.add_argument("--disable-popup-blocking")
if "notheadless" in sys.argv:
self.logger.warning("Opening browser GUI because of Argument 'notheadless'")
else:
options.add_argument('--headless')
# Print to pdf
options.set_preference("print_printer", "Mozilla Save to PDF")
options.set_preference("print.always_print_silent", True)
options.set_preference("print.show_print_progress", False)
options.set_preference('print.save_as_pdf.links.enabled', True)
# Just save if the filetype is pdf already, does not work!
options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True)
# Save existing pdf
options.set_preference("browser.download.folderList", 2)
# options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
# options.set_preference("pdfjs.disabled", True)
@@ -140,7 +142,7 @@ class PDFDownloader:
hrefs = [h for h in hrefs \
if not sum([(domain in h) for domain in blacklisted]) # sum([True, False, False, False]) == 1 (esp. not 0)
] # filter a tiny bit at least
self.logger.info(f"Hrefs result (before:{len_old}, after: {len(hrefs)})")
self.logger.info(f"Hrefs filtered (before: {len_old}, after: {len(hrefs)})")
return hrefs