Working and up to date. WIP misc manual actions
This commit is contained in:
@@ -2,6 +2,7 @@ import time
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import base64
|
||||
import requests
|
||||
from selenium import webdriver
|
||||
@@ -21,18 +22,19 @@ class PDFDownloader:
|
||||
def start(self):
|
||||
options=Options()
|
||||
options.profile = config["browser_profile_path"]
|
||||
# TODO: Get headless mode interactively
|
||||
options.add_argument('--headless')
|
||||
# options.add_argument("--disable-infobars")
|
||||
# options.set_preference("javascript.enabled", False)
|
||||
# options.add_argument("--disable-popup-blocking")
|
||||
if "notheadless" in sys.argv:
|
||||
self.logger.warning("Opening browser GUI because of Argument 'notheadless'")
|
||||
else:
|
||||
options.add_argument('--headless')
|
||||
|
||||
# Print to pdf
|
||||
options.set_preference("print_printer", "Mozilla Save to PDF")
|
||||
options.set_preference("print.always_print_silent", True)
|
||||
options.set_preference("print.show_print_progress", False)
|
||||
options.set_preference('print.save_as_pdf.links.enabled', True)
|
||||
|
||||
# Just save if the filetype is pdf already, does not work!
|
||||
options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True)
|
||||
# Save existing pdf
|
||||
options.set_preference("browser.download.folderList", 2)
|
||||
# options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
|
||||
# options.set_preference("pdfjs.disabled", True)
|
||||
@@ -140,7 +142,7 @@ class PDFDownloader:
|
||||
hrefs = [h for h in hrefs \
|
||||
if not sum([(domain in h) for domain in blacklisted]) # sum([True, False, False, False]) == 1 (esp. not 0)
|
||||
] # filter a tiny bit at least
|
||||
self.logger.info(f"Hrefs result (before:{len_old}, after: {len(hrefs)})")
|
||||
self.logger.info(f"Hrefs filtered (before: {len_old}, after: {len(hrefs)})")
|
||||
return hrefs
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user