bug fixes
This commit is contained in:
@@ -1,47 +0,0 @@
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import configuration
|
||||
config = configuration.main_config["DOWNLOADS"]
|
||||
|
||||
shrink_sizes = []
|
||||
|
||||
def shrink_pdf(article):
|
||||
article_loc = Path(article.save_path) / article.file_name
|
||||
initial_size = article_loc.stat().st_size
|
||||
compressed_tmp = Path(config['default_download_path']) / "compressed.pdf"
|
||||
|
||||
if article_loc.suffix != "pdf":
|
||||
return article # it probably was a youtube video
|
||||
|
||||
c = subprocess.run(
|
||||
[
|
||||
"gs",
|
||||
"-sDEVICE=pdfwrite",
|
||||
"-dPDFSETTINGS=/screen",
|
||||
"-dNOPAUSE",
|
||||
"-dBATCH",
|
||||
f"-sOutputFile={compressed_tmp}",
|
||||
f"{article_loc}"
|
||||
],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
|
||||
if c.returncode == 0:
|
||||
try:
|
||||
os.replace(compressed_tmp, article_loc)
|
||||
except OSError as e:
|
||||
logger.error(f"Compression ran but I could not copy back the file {e}")
|
||||
|
||||
final_size = article_loc.stat().st_size
|
||||
shrink_sizes.append(initial_size - final_size)
|
||||
logger.info(f"Compression worked. Avg shrinkage: {int(sum(shrink_sizes)/len(shrink_sizes) / 1000)} KB")
|
||||
|
||||
|
||||
else:
|
||||
logger.error(f"Could not run the compression! {c.stderr.decode()} - {c.stdout.decode()}")
|
||||
|
||||
return article
|
@@ -85,10 +85,8 @@ class PDFDownloader:
|
||||
# will be propagated to the saved file (dst) as well
|
||||
|
||||
fname = article_object.fname_template
|
||||
fname = ensure_unique(article_object.save_path, fname)
|
||||
dst = os.path.join(article_object.save_path, fname)
|
||||
if os.path.exists(dst):
|
||||
fname = make_path_unique(fname)
|
||||
dst = os.path.join(article_object.save_path, fname)
|
||||
|
||||
|
||||
if url[-4:] == ".pdf": # calling the ususal pdf generation would not yield a nice pdf, just download it directly
|
||||
@@ -137,7 +135,6 @@ class PDFDownloader:
|
||||
|
||||
def create_tmp_profile(self, full_profile_path: Path = Path(config["browser_profile_path"])) -> Path:
|
||||
reduced_profile_path = Path(f"/tmp/firefox_profile_{uuid.uuid4()}")
|
||||
print(reduced_profile_path, full_profile_path)
|
||||
os.mkdir(reduced_profile_path)
|
||||
# copy needed directories
|
||||
dirs = ["extensions", "storage"]
|
||||
@@ -150,13 +147,20 @@ class PDFDownloader:
|
||||
shutil.copy(full_profile_path / f, reduced_profile_path)
|
||||
|
||||
folder_size = round(sum(p.stat().st_size for p in Path(reduced_profile_path).rglob('*')) / 1024 / 1024, 3)
|
||||
self.logger.info(f"Generated temporary profile with size {folder_size} MB")
|
||||
self.logger.info(f"Generated temporary profile at {reduced_profile_path} with size {folder_size} MB")
|
||||
return reduced_profile_path
|
||||
|
||||
|
||||
|
||||
|
||||
def make_path_unique(path):
|
||||
fname, ending = os.path.splitext(path)
|
||||
fname += datetime.datetime.now().strftime("%d-%H%M%S")
|
||||
return fname + ending
|
||||
def ensure_unique(path, fname):
|
||||
fbase, ending = os.path.splitext(fname)
|
||||
|
||||
exists = os.path.exists(os.path.join(path, fname))
|
||||
i = 1
|
||||
while exists:
|
||||
fname = fbase + f" -- fetch {i}" + ending
|
||||
i += 1
|
||||
exists = os.path.exists(os.path.join(path, fname))
|
||||
|
||||
return fname
|
||||
|
@@ -3,7 +3,7 @@ from .download.browser import PDFDownloader
|
||||
from .download.youtube import YouTubeDownloader
|
||||
from .fetch.runner import get_description
|
||||
from .upload.runner import upload_to_archive as run_upload
|
||||
from .compress.runner import shrink_pdf
|
||||
|
||||
|
||||
import time
|
||||
import logging
|
||||
@@ -53,14 +53,3 @@ class UploadWorker(TemplateWorker):
|
||||
|
||||
super()._handle_article(article_watcher, action)
|
||||
# article_watcher.upload_completed = True
|
||||
|
||||
|
||||
|
||||
class CompressWorker(TemplateWorker):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
def _handle_article(self, article_watcher):
|
||||
action = shrink_pdf
|
||||
super()._handle_article(article_watcher, action)
|
||||
# article_watcher.compression_completed = True
|
Reference in New Issue
Block a user