67 lines
1.5 KiB
Python
67 lines
1.5 KiB
Python
"""
|
|
Runs the news_fetch pipeline against a manually curated list of urls and saves them locally
|
|
"""
|
|
import sys
|
|
sys.path.append("../app/news_fetch")
|
|
import runner
|
|
import logging
|
|
logger = logging.getLogger()
|
|
import json
|
|
|
|
from rich.console import Console
|
|
from rich.table import Table
|
|
console = Console()
|
|
|
|
logger.info("Overwriting production values for single time media-fetch")
|
|
runner.configuration.models.set_db(
|
|
runner.configuration.SqliteDatabase("../.dev/media_downloads.db")
|
|
)
|
|
runner.configuration.main_config["DOWNLOADS"]["local_storage_path"] = "../.dev/"
|
|
|
|
|
|
def fetch():
|
|
dispatcher = runner.Dispatcher()
|
|
|
|
dispatcher.workers_in = [{"FetchWorker": runner.FetchWorker(), "DownloadWorker": runner.DownloadWorker()}]
|
|
dispatcher.workers_out = [{"PrintWorker": runner.PrintWorker()}]
|
|
|
|
dispatcher.start()
|
|
|
|
with open("media_urls.json", "r") as f:
|
|
url_list = json.loads(f.read())
|
|
|
|
logger.info(f"Found {len(url_list)} media urls")
|
|
for u in url_list:
|
|
msg_text = f"<{u}|dummy preview text>"
|
|
dispatcher.incoming_request(msg)
|
|
|
|
|
|
|
|
def show():
|
|
|
|
t = Table(
|
|
title = "ArticleDownloads",
|
|
row_styles = ["white", "bright_black"],
|
|
)
|
|
|
|
entries = ["title", "article_url", "archive_url", "authors"]
|
|
|
|
for e in entries:
|
|
t.add_column(e, justify = "right")
|
|
|
|
sel = runner.models.ArticleDownload.select()
|
|
|
|
for s in sel:
|
|
c = [getattr(s, e) for e in entries]#
|
|
c[-1] = str([a.author for a in c[-1]])
|
|
print(c)
|
|
t.add_row(*c)
|
|
|
|
|
|
console.print(t)
|
|
|
|
|
|
|
|
|
|
# fetch()
|
|
show() |