From 8f3ea25662d501e89b2912dee2f5cdd616f9ddde Mon Sep 17 00:00:00 2001 From: Remy Moll Date: Wed, 20 Apr 2022 16:49:55 +0200 Subject: [PATCH] Many bug fixes --- Dockerfile | 20 +++-- README.md | 8 +- app/configuration.py | 10 ++- app/runner.py | 61 ++++++--------- app/utils_slack/message_helpers.py | 1 + app/utils_slack/runner.py | 2 +- app/utils_storage/migrations/migration.001.py | 67 ++++++++++++++++ app/utils_storage/models.py | 51 ++++++------ app/utils_worker/compress/runner.py | 3 + app/utils_worker/download/browser.py | 16 ++-- app/utils_worker/download/youtube.py | 78 +++++++++++++------ app/utils_worker/fetch/runner.py | 12 ++- app/utils_worker/upload/runner.py | 4 +- app/utils_worker/worker_template.py | 2 - app/utils_worker/workers.py | 4 +- requirements.txt | 2 +- 16 files changed, 223 insertions(+), 118 deletions(-) create mode 100644 app/utils_storage/migrations/migration.001.py diff --git a/Dockerfile b/Dockerfile index d4d0203..093f2f1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,22 @@ -FROM ubuntu:latest -# UGH, timezone issues -RUN ln -snf /usr/share/zoneinfo/$CONTAINER_TIMEZONE /etc/localtime && echo $CONTAINER_TIMEZONE > /etc/timezone +FROM python:latest -RUN apt-get update && apt-get install -y evince libcanberra-gtk-module && apt-get install -y xauth wget tar python3 python3-pip python3-setuptools python3-wheel python3-dev build-essential firefox ghostscript +RUN apt-get update && apt-get install -y \ +evince libcanberra-gtk-module \ +# for checking +xauth wget tar firefox \ +# for geckodriver + gui +ghostscript +# for compression # Download gecko (firefox) driver for selenium -RUN wget https://github.com/mozilla/geckodriver/releases/download/v0.30.0/geckodriver-v0.30.0-linux64.tar.gz -RUN tar -x geckodriver -zf geckodriver-v0.30.0-linux64.tar.gz -O > /usr/bin/geckodriver +RUN wget https://github.com/mozilla/geckodriver/releases/download/v0.31.0/geckodriver-v0.31.0-linux64.tar.gz +RUN tar -x geckodriver -zf geckodriver-v0.31.0-linux64.tar.gz -O > /usr/bin/geckodriver RUN chmod +x /usr/bin/geckodriver -RUN rm geckodriver-v0.30.0-linux64.tar.gz +RUN rm geckodriver-v0.31.0-linux64.tar.gz RUN echo "127.0.0.1 localhost" >> /etc/hosts COPY requirements.txt /app/ -RUN python3 -m pip install --upgrade pip && python3 -m pip install -r /app/requirements.txt +RUN python3 -m pip install -r /app/requirements.txt RUN mkdir -p /app/auto_news COPY app /app/auto_news diff --git a/README.md b/README.md index 9970347..3bb1884 100644 --- a/README.md +++ b/README.md @@ -40,9 +40,9 @@ where the `Dockerfile` has to be in the working directory ## Cheat-sheet Remy: -`docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ auto_news` +`docker run -it -v /mnt/Data/COSS/CONTAINERDATA/:/app/file_storage/ auto_news` -`docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -v /mnt/Data/COSS/auto_news/app:/code --entrypoint /bin/bash auto_news` +`docker run -it -v /mnt/Data/COSS/CONTAINERDATA/:/app/file_storage/ -v /mnt/Data/COSS/auto_news/app:/code --entrypoint /bin/bash auto_news` `docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -e DISPLAY=":0" --network host -v \$XAUTHORITY:/root/.Xauthority auto_news check` @@ -51,6 +51,6 @@ where the `Dockerfile` has to be in the working directory ## Roadmap: -[] automatically upload files to NAS -[] handle paywalled sites like faz, spiegel, .. through their dedicated edu-sites +[ ] automatically upload files to NAS +[ ] handle paywalled sites like faz, spiegel, .. through their dedicated edu-sites ... \ No newline at end of file diff --git a/app/configuration.py b/app/configuration.py index 25bcd82..0981f26 100644 --- a/app/configuration.py +++ b/app/configuration.py @@ -23,20 +23,24 @@ if "debug" in sys.argv: logger.warning("Running in debugging mode because launched with argument 'debug'") # parsed.read("/code/config.ini") - db_path = os.path.join(parsed["DATABASE"]["db_path_dev"], parsed["DATABASE"]["db_name"]) + db_base_path = parsed["DATABASE"]["db_path_dev"] parsed["SLACK"]["archive_id"] = parsed["SLACK"]["debug_id"] parsed["MAIL"]["recipient"] = parsed["MAIL"]["sender"] else: logger.warning("Using production values, I hope you know what you're doing...") - db_path = os.path.join(parsed["DATABASE"]["db_path_prod"], parsed["DATABASE"]["db_name"]) + db_base_path = parsed["DATABASE"]["db_path_prod"] from utils_storage import models # Set up the database models.set_db( SqliteDatabase( - db_path, + os.path.join(db_base_path, parsed["DATABASE"]["chat_db_name"]), + pragmas = {'journal_mode': 'wal'} # mutliple threads can read at once + ), + SqliteDatabase( + os.path.join(db_base_path, parsed["DATABASE"]["download_db_name"]), pragmas = {'journal_mode': 'wal'} # mutliple threads can read at once ) ) \ No newline at end of file diff --git a/app/runner.py b/app/runner.py index 82c4c72..ed96736 100644 --- a/app/runner.py +++ b/app/runner.py @@ -27,8 +27,14 @@ class ArticleWatcher: self._fetch_completed = self._download_completed = self._compression_completed = self._upload_completed = False # first step: gather metadata - self.fetch.process(self) # this will call the update_status method - self.upload.process(self) # idependdent from the rest + if self.fetch and self.upload: + self.fetch.process(self) # this will call the update_status method + self.upload.process(self) # idependdent from the rest + else: # the full kwargs were not provided, only do a manual run + # overwrite update_status() because calls from the workers will result in erros + self.update_status = lambda completed: logger.info(f"Completed action {completed}") + for w in kwargs.get("workers_manual"): + w.process(self) def update_status(self, completed_action): @@ -36,23 +42,6 @@ class ArticleWatcher: Article download is complete iff fetch and download were successfull and compression was run """ # if self.completition_notified and self._compression_completed and self._fetch_completed and self._download_completed and self._upload_completed, we are done - # we don't need to delete self though, because it is then automatically garbage-collected - # all_done = self._fetch_completed and self._download_completed and self._compression_completed and self._upload_completed - # if self._fetch_completed and not self._download_called: - # self._download_called = True - # self.download.process(self) - # elif self._download_completed and not self._compression_called: - # self._compression_called = True - # self.compress.process(self) - # elif self._compression_completed: # last step - # self.completition_notifier(self.article) - # # triggers action in Coordinator - # elif self._upload_completed: - # # this case occurs when upload was faster than compression - # pass - # else: - # logger.warning(f"update_status called with unusual configuration {self._fetch_completed},{self._download_completed},{self._compression_completed}") - if completed_action == "fetch": self.download.process(self) elif completed_action == "download": @@ -129,15 +118,16 @@ class Coordinator: def incoming_request(self, message): - # TODO CHECK ME! """This method is passed onto the slack worker. It gets triggered when a new message is received.""" url = message.urls[0] # ignore all the other ones a, is_new = models.ArticleDownload.get_or_create(article_url=url) message.thread.article = a message.thread.save() + self.kwargs.update({"notifier" : self.article_complete_notifier}) - if is_new: - self.kwargs.update({"notifier" : self.article_complete_notifier}) + if is_new or (a.file_name == "" and a.verified == 0): + # check for models that were created but were abandonned. This means they have missing information, most importantly no associated file + # this overwrites previously set information, but that should not be too important ArticleWatcher( a, **self.kwargs @@ -152,12 +142,13 @@ class Coordinator: - def manual_processing(self, url_list, target_calls): - for url in url_list: - article = models.ArticleDownload.get_or_none(article_url=url) - watcher = ArticleWatcher(article, self.article_complete_notifier) - for t in target_calls: - t.process(watcher) + def manual_processing(self, articles, workers): + for w in workers: + w.start() + + for article in articles: + notifier = lambda article: print(f"Completed manual actions for {article}") + ArticleWatcher(article, workers_manual = workers, notifier = notifier) def article_complete_notifier(self, article): self.worker_slack.bot_worker.respond_channel_message(article) @@ -170,12 +161,14 @@ if __name__ == "__main__": if "upload" in sys.argv: - urls = models.ArticleDownload.select(models.ArticleDownload.article_url).where(models.ArticleDownload.archive_url == "").execute() - logger.info(f"Launching upload to archive for {len(urls)} urls.") - coordinator.manual_processing(urls, [UploadWorker()]) + articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "").execute() + logger.info(f"Launching upload to archive for {len(articles)} articles.") + coordinator.manual_processing(articles, [UploadWorker()]) + elif "check" in sys.argv: from utils_check import runner as check_runner check_runner.verify_unchecked() + else: # launch with full action kwargs = { "worker_download" : DownloadWorker(), @@ -186,9 +179,3 @@ if __name__ == "__main__": "worker_mail" : mail_runner, } coordinator.add_workers(**kwargs) - - - - -# TODO -# Resume interrupted article models \ No newline at end of file diff --git a/app/utils_slack/message_helpers.py b/app/utils_slack/message_helpers.py index 84cbc58..aeb71c1 100644 --- a/app/utils_slack/message_helpers.py +++ b/app/utils_slack/message_helpers.py @@ -189,6 +189,7 @@ def message_dict_to_model(message): uid = message.get("user", "BAD USER") if uid == "BAD USER": logger.critical("Message has no user?? {}".format(message)) + return None user, _ = models.User.get_or_create(user_id = uid) thread, _ = models.Thread.get_or_create(thread_ts = thread_ts) diff --git a/app/utils_slack/runner.py b/app/utils_slack/runner.py index 92a88e3..2e20ea5 100644 --- a/app/utils_slack/runner.py +++ b/app/utils_slack/runner.py @@ -158,7 +158,7 @@ class BotApp(App): fully_processed = len([t for t in threads if t.is_fully_processed]) fully_unprocessed = len([t for t in threads if t.message_count == 1]) articles_unprocessed = len(models.ArticleDownload.select().where(models.ArticleDownload.verified < 1)) - self.logger.info(f"[bold]STATUS[/bold]: Fully processed {all_threads}/{fully_processed} threads. {fully_unprocessed} threads have 0 replies. Article-objects to verify: {articles_unprocessed}", extra={"markup": True}) + self.logger.info(f"[bold]STATUS[/bold]: Fully processed {fully_processed}/{all_threads} threads. {fully_unprocessed} threads have 0 replies. Article-objects to verify: {articles_unprocessed}", extra={"markup": True}) diff --git a/app/utils_storage/migrations/migration.001.py b/app/utils_storage/migrations/migration.001.py new file mode 100644 index 0000000..bb5bb8c --- /dev/null +++ b/app/utils_storage/migrations/migration.001.py @@ -0,0 +1,67 @@ +from playhouse.migrate import * + + +""" +This migration assumes that downloads.db kept the exact same structure as before. +messages.db should drop the table articlemodelreference in favor of a new field article in the thread-table +Since each thread is constrained to exactly one article this makes the most sense. + +This migration assumes that messages.db gets a new field in the table thread: +id | thread_ts | article_id + +We now need to migrate from the table articlemodelreference and then delete it. +""" + + +db = SqliteDatabase("/code/.dev/messages.db") +migrator = SqliteMigrator(db) + + +article_field = IntegerField(null=True) + + +migrate( + migrator.add_column('thread', 'article_id', article_field), + # migrator.drop_column('some_table', 'old_column'), +) + + + +# these are the old models, adapted to the migration + +class BaseModel(Model): + class Meta: + database = db + +class User(BaseModel): + user_id = CharField(default='', unique=True) + +class Thread(BaseModel): + """The threads that concern us are only created if the messages that contain urls""" + thread_ts = FloatField(default = 0) + article_id = IntegerField(null=True) + + +class Message(BaseModel): + ts = FloatField(unique=True) #for sorting + channel_id = CharField(default='') + user = ForeignKeyField(User, backref="messages") + text = TextField(default='') + thread = ForeignKeyField(Thread, backref="messages", default=None) + file_type = CharField(default='') + perma_link = CharField(default='') + is_processed_override = BooleanField(default=False) + + +class ArticleModelReference(BaseModel): + message = ForeignKeyField(Message, backref='article_model_references') + article_model_id = IntegerField(default = 0) + + + + +for ref in ArticleModelReference.select(): + ref.message.thread.article_id = ref.article_model_id + ref.message.thread.save() + +db.drop_tables((ArticleModelReference)) \ No newline at end of file diff --git a/app/utils_storage/models.py b/app/utils_storage/models.py index 3720aa8..c01b0e9 100644 --- a/app/utils_storage/models.py +++ b/app/utils_storage/models.py @@ -12,15 +12,23 @@ config = configuration.parsed["DOWNLOADS"] slack_config = configuration.parsed["SLACK"] ## Helpers -db = DatabaseProxy() +chat_db = DatabaseProxy() +download_db = DatabaseProxy() + # set the nature of the db at runtime -class BaseModel(Model): + +class DownloadBaseModel(Model): class Meta: - database = db + database = download_db + +class ChatBaseModel(Model): + class Meta: + database = chat_db + ## == Article related models == ## -class ArticleDownload(BaseModel): +class ArticleDownload(DownloadBaseModel): title = CharField(default='') pub_date = DateField(default = '') download_date = DateField(default = datetime.date.today) @@ -55,7 +63,7 @@ class ArticleDownload(BaseModel): @property def fname_template(self): - if self.source_name == "youtube.com": + if "youtube.com" in self.source_name or "youtu.be" in self.source_name: fname = "{} -- {}".format(self.source_name, self.title) else: fname = "{} -- {}.pdf".format(self.source_name, self.title) @@ -155,23 +163,23 @@ class ArticleDownload(BaseModel): return True, {} -class ArticleKeyword(BaseModel): +class ArticleKeyword(DownloadBaseModel): # instance gets created for every one keyword -> flexible in size article = ForeignKeyField(ArticleDownload, backref='keywords') keyword = CharField() -class ArticleAuthor(BaseModel): +class ArticleAuthor(DownloadBaseModel): article = ForeignKeyField(ArticleDownload, backref='authors') author = CharField() -class ArticleReference(BaseModel): +class ArticleReference(DownloadBaseModel): article = ForeignKeyField(ArticleDownload, backref='references') reference_url = TextField(default = '') -class ArticleRelated(BaseModel): +class ArticleRelated(DownloadBaseModel): article = ForeignKeyField(ArticleDownload, backref='related') related_file_name = TextField(default = '') @@ -179,13 +187,13 @@ class ArticleRelated(BaseModel): ## == Slack-thread related models == ## -class User(BaseModel): +class User(ChatBaseModel): user_id = CharField(default='', unique=True) # messages -class Thread(BaseModel): - """The threads that concern us are only created if the messages that contain urls""" +class Thread(ChatBaseModel): + """The threads that concern us are only created if the base massage contains a url""" thread_ts = FloatField(default = 0) article = ForeignKeyField(ArticleDownload, backref="slack_thread", null=True, default=None) # provides, ts, user, models @@ -227,7 +235,7 @@ class Thread(BaseModel): -class Message(BaseModel): +class Message(ChatBaseModel): ts = FloatField(unique=True) #for sorting channel_id = CharField(default='') user = ForeignKeyField(User, backref="messages") @@ -275,7 +283,7 @@ class Message(BaseModel): return len(self.urls) == 1 -class Reaction(BaseModel): +class Reaction(ChatBaseModel): type = CharField(default = "") message = ForeignKeyField(Message, backref="reaction") @@ -286,17 +294,16 @@ class Reaction(BaseModel): - - - - def create_tables(): - with db: - db.create_tables([ArticleDownload, ArticleKeyword, ArticleAuthor, ArticleReference, ArticleRelated, User, Message, Thread, Reaction]) + with download_db: + download_db.create_tables([ArticleDownload, ArticleKeyword, ArticleAuthor, ArticleReference, ArticleRelated]) + with chat_db: + chat_db.create_tables([User, Message, Thread, Reaction]) -def set_db(db_object): - db.initialize(db_object) +def set_db(chat_db_object, download_db_object): + chat_db.initialize(chat_db_object) + download_db.initialize(download_db_object) create_tables() def clear_path_name(path): diff --git a/app/utils_worker/compress/runner.py b/app/utils_worker/compress/runner.py index 5f67bb9..8a99fcb 100644 --- a/app/utils_worker/compress/runner.py +++ b/app/utils_worker/compress/runner.py @@ -9,6 +9,9 @@ shrink_sizes = [] def shrink_pdf(article): initial_size = os.path.getsize(article.save_path + article.file_name) + if article.file_name[-4:] != ".pdf": + return article # it probably was a youtube video + c = subprocess.run( ["gs", "-sDEVICE=pdfwrite", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dBATCH", f"-sOutputFile={config['default_download_path']}/compressed.pdf", f'"{article.save_path + article.file_name}"'], stdout=subprocess.PIPE, diff --git a/app/utils_worker/download/browser.py b/app/utils_worker/download/browser.py index 3af0b36..f1767b2 100644 --- a/app/utils_worker/download/browser.py +++ b/app/utils_worker/download/browser.py @@ -7,10 +7,10 @@ import requests from selenium import webdriver from selenium.webdriver.firefox.options import Options import configuration +import json config = configuration.parsed["DOWNLOADS"] - - +blacklisted = json.loads(config["blacklisted_href_domains"]) class PDFDownloader: """Saves a given url. Fills the object it got as a parameter""" @@ -61,10 +61,6 @@ class PDFDownloader: self.autostart() url = article_object.article_url - # arbitrary bug fixes: - if "focus.de" in url or "bloomberg.com" in url: - url = url.replace("https://", "https://outline.com/") - sleep_time += 5 try: self.driver.get(url) except Exception as e: @@ -97,7 +93,7 @@ class PDFDownloader: if success: article_object.file_name = fname - article_object.set_references = self.get_references() + article_object.set_references(self.get_references()) else: article_object.file_name = "" @@ -140,10 +136,12 @@ class PDFDownloader: hrefs = [e.get_attribute("href") for e in self.driver.find_elements_by_xpath("//a[@href]")] except: hrefs = [] - # TODO TEST THIS + old = hrefs hrefs = [h for h in hrefs \ - if bool([(domain in h) for domain in config["blacklisted_href_domains"]]) + if not sum([(domain in h) for domain in blacklisted]) # sum([True, False, False, False]) == 1 (esp. not 0) ] # filter a tiny bit at least + diff = set(old) ^ set(hrefs) + self.logger.info(f"Removed {len(diff)} hrefs: {diff} (before:{len(old)}, after: {len(hrefs)})") return hrefs diff --git a/app/utils_worker/download/youtube.py b/app/utils_worker/download/youtube.py index 014e382..f7d5c0e 100644 --- a/app/utils_worker/download/youtube.py +++ b/app/utils_worker/download/youtube.py @@ -1,33 +1,65 @@ -import logging +from __future__ import unicode_literals +import youtube_dl import os -from pytube import YouTube +import logging logger = logging.getLogger(__name__) -def save_video(article_object): - """Saves video accoring to url and save path""" - url = article_object.article_url - logger.info("Saving new video") - try: - yt = YouTube(url) - streams = yt.streams.filter(progressive=True).order_by('resolution') - except Exception as e: - article_object.file_name = "ERROR: {}".format(e) - return article_object +class MyLogger(object): + def debug(self, msg): pass + def warning(self, msg): pass + def error(self, msg): + logger.error(msg) - if streams: # if it's not empty - vid = streams[-1] - article_object.source_name = "youtube.com" - article_object.title = yt.title + + +class YouTubeDownloader: + def __init__(self) -> None: + pass + + + def post_download_hook(self, ret_code): + # print(ret_code) + if ret_code['status'] == 'finished': + file_loc = ret_code["filename"] + fname = os.path.basename(file_loc) + self.article_object.file_name = fname + + + def save_video(self, article_object): + """Saves video accoring to url and save path""" + self.article_object = article_object + url = article_object.article_url + logger.info("Saving new video") file_path = os.path.join(article_object.save_path, article_object.fname_template) + ydl_opts = { + 'format': 'best[height<=720]', + 'outtmpl': f"{file_path}.%(ext)s", # basically the filename from the object, but with a custom extension depending on the download + 'logger': MyLogger(), + 'progress_hooks': [self.post_download_hook], + 'updatetime': False + } try: - vid.download(file_path) - article_object.file_name = article_object.fname_template + with youtube_dl.YoutubeDL(ydl_opts) as ydl: + ydl.download([url]) + # article file name is updated in self.post_download_hook except Exception as e: logger.error(f"Youtube download crashed: {e}") - article_object.file_name = "Error while downloading" - else: - article_object.file_name = "No streams available" - - return article_object + article_object.file_name = "" + + return article_object + + + +# class DummyArticle: +# article_url = "https://www.welt.de/politik/ausland/article238267261/Baerbock-Lieferung-gepanzerter-Fahrzeuge-an-die-Ukraine-kein-Tabu.html" +# save_path = "/app/file_storage/" +# fname_template = "www.youtube.com -- Test" +# file_name = "" + +# m = DummyArticle() +# t = YouTubeDownloader() +# t.save_video(m) + +# print(m.file_name) diff --git a/app/utils_worker/fetch/runner.py b/app/utils_worker/fetch/runner.py index 1fc227e..960a0f2 100644 --- a/app/utils_worker/fetch/runner.py +++ b/app/utils_worker/fetch/runner.py @@ -37,24 +37,28 @@ def get_description(article_object): except: news_article = fallback - if news_article.title: title = news_article.title else: title = fallback.title - if news_article.summary: summary = news_article.summary elif news_article.text: ind = min(500, len(news_article.text)) summary = news_article.text[:ind] + "..." else: - summary = fallback.summary + summary = fallback.summary + + if news_article.meta_lang: + lang = news_article.meta_lang + else: + lang = "" article_object.title = title article_object.summary = summary + article_object.language = lang article_object.set_authors(news_article.authors) article_object.set_keywords(news_article.keywords) - + return article_object diff --git a/app/utils_worker/upload/runner.py b/app/utils_worker/upload/runner.py index b8d188f..5542d16 100644 --- a/app/utils_worker/upload/runner.py +++ b/app/utils_worker/upload/runner.py @@ -9,10 +9,10 @@ def upload_to_archive(article_object): try: wayback = WaybackMachineSaveAPI(url, user_agent) archive_url = wayback.save() - logger.info(f"{url} uploaded to archive successfully") + # logger.info(f"{url} uploaded to archive successfully") article_object.archive_url = archive_url except Exception as e: article_object.archive_url = "Error while uploading: {}".format(e) - logger.error(f"Error while generating new url: {e}") + logger.error(f"Error while generating archive url: {e}") return article_object \ No newline at end of file diff --git a/app/utils_worker/worker_template.py b/app/utils_worker/worker_template.py index 96be787..a19a726 100644 --- a/app/utils_worker/worker_template.py +++ b/app/utils_worker/worker_template.py @@ -1,7 +1,6 @@ from threading import Thread import time import logging -# logger = logging.getLogger(__name__) class TemplateWorker(Thread): @@ -34,7 +33,6 @@ class TemplateWorker(Thread): def _handle_article(self, article_watcher, action=None): - # TODO Overload in children classes if action is None: self.logger.error("Unoverloaded call of _handle_article(). This should not occur in prod") else: diff --git a/app/utils_worker/workers.py b/app/utils_worker/workers.py index f29aab0..21b4388 100644 --- a/app/utils_worker/workers.py +++ b/app/utils_worker/workers.py @@ -1,6 +1,6 @@ from .worker_template import TemplateWorker from .download.browser import PDFDownloader -from .download.youtube import save_video +from .download.youtube import YouTubeDownloader from .fetch.runner import get_description from .upload.runner import upload_to_archive as run_upload from .compress.runner import shrink_pdf @@ -11,7 +11,7 @@ logger = logging.getLogger(__name__) class DownloadWorker(TemplateWorker): def __init__(self) -> None: self.dl_runner = PDFDownloader().download - self.yt_runner = save_video + self.yt_runner = YouTubeDownloader().save_video super().__init__() def _handle_article(self, article_watcher): diff --git a/requirements.txt b/requirements.txt index 50f3bca..5347fd9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ peewee selenium -pytube +youtube-dl waybackpy slack_bolt # relies on slack_sdk newspaper3k