Better structure
This commit is contained in:
parent
0a6dde8c78
commit
0d76bcbb98
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,3 +2,4 @@
|
||||
|
||||
*.pyc
|
||||
*.log
|
||||
__pycache__/
|
30
README.md
30
README.md
@ -6,26 +6,25 @@ A utility to fetch article requests from slack and generate pdfs for them, fully
|
||||
## Running
|
||||
### How to run - auto archiving mode
|
||||
In this mode the program is launched as a docker container, in a headless mode. For persistence purposes a local storage volume is required, but that's it!
|
||||
|
||||
`docker run -it -v <your storage>:/app/file_storage/ auto_news`
|
||||
|
||||
You can specify additional parameters:
|
||||
|
||||
`docker run -it -v <your storage>:/app/file_storage/ auto_news debug` runs with debug values (does not write to prod db, does not send mails)
|
||||
`docker run -it -v <your storage>:/app/file_storage/ auto_news upload` catches up on past uploads to archive.
|
||||
`docker run -it -v <your storage>:/app/file_storage/ -e DISPLAY=":0" --network host -v \$XAUTHORITY:/root/.Xauthority auto_news check` lets you visually verify the downloaded files. Be aware that it requires additional parameters in order to open guis on the host.
|
||||
|
||||
`docker run -it -v <your storage>:/app/file_storage/ auto_news upload` catches up on incomplete uploads to archive.
|
||||
|
||||
`docker run -it -v <your storage>:/app/file_storage/ -e DISPLAY=":0" --network host -v \$XAUTHORITY:/root/.Xauthority auto_news check` lets you visually verify the downloaded files. The additional parameters are required in order to open guis on the host.
|
||||
|
||||
|
||||
### How to run - development mode
|
||||
In this mode, a docker container is launched with an additional volume, the local code. You can test your code without the need to rebuild the image.
|
||||
|
||||
`docker run -it -v <your storage>:/app/file_storage/ -v <your code>:/code/ --entry-point /bin/bash auto_news`
|
||||
You are droppped into a bash shell, in which you can navigate to the `/code` directory and then test live.
|
||||
|
||||
|
||||
% ### How to run - file checker mode
|
||||
% This mode requires the most access rights. You want to access all files and open gui programs.
|
||||
% `docker run -it -e DISPLAY=":0" --network host -v $XAUTHORITY:/root/.Xauthority -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -v /mnt/Data/COSS/DOWNLOADS/auto_news/app:/code auto_news /bin/bash`
|
||||
% Similarly to the development mode, you can cd into code and run your checking duties.
|
||||
|
||||
|
||||
|
||||
|
||||
## Building
|
||||
@ -41,6 +40,17 @@ where the `Dockerfile` has to be in the working directory
|
||||
|
||||
## Cheat-sheet Remy:
|
||||
|
||||
docker run -it -e LIVECODE=TRUE -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -v /mnt/Data/COSS/DOWNLOADS/auto_news/app:/code/ auto_news /bin/bash
|
||||
`docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ auto_news`
|
||||
|
||||
docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ auto_news
|
||||
`docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -v /mnt/Data/COSS/auto_news/app:/code --entrypoint /bin/bash auto_news`
|
||||
|
||||
|
||||
`docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -e DISPLAY=":0" --network host -v \$XAUTHORITY:/root/.Xauthority auto_news check`
|
||||
|
||||
|
||||
|
||||
## Roadmap:
|
||||
|
||||
[] automatically upload files to NAS
|
||||
[] handle paywalled sites like faz, spiegel, .. through their dedicated edu-sites
|
||||
...
|
@ -7,7 +7,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
from utils_mail import runner as mail_runner
|
||||
from utils_slack import runner as slack_runner
|
||||
from utils.workers import CompressWorker, DownloadWorker, FetchWorker, UploadWorker
|
||||
from utils_worker.workers import CompressWorker, DownloadWorker, FetchWorker, UploadWorker
|
||||
|
||||
|
||||
class ArticleWatcher:
|
||||
@ -174,7 +174,8 @@ if __name__ == "__main__":
|
||||
logger.info(f"Launching upload to archive for {len(urls)} urls.")
|
||||
coordinator.manual_processing(urls, [UploadWorker()])
|
||||
elif "check" in sys.argv:
|
||||
logger.info("Not implemented yet.")
|
||||
from utils_check import runner as check_runner
|
||||
check_runner.verify_unchecked()
|
||||
else: # launch with full action
|
||||
kwargs = {
|
||||
"worker_download" : DownloadWorker(),
|
||||
|
285
app/utils_check/runner.py
Normal file
285
app/utils_check/runner.py
Normal file
@ -0,0 +1,285 @@
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
from rich.columns import Columns
|
||||
from rich.rule import Rule
|
||||
console = Console()
|
||||
hline = Rule(style="white")
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from slack_sdk import WebClient
|
||||
import configuration
|
||||
models = configuration.models
|
||||
|
||||
u_options = {
|
||||
"ENTER" : "Accept PDF as is. It gets marked as verified",
|
||||
"D" : "set languange to DE and set verified",
|
||||
"E" : "set languange to EN and set verified",
|
||||
"O" : "set other language (prompted)",
|
||||
"R" : "set related files (prompted multiple times)",
|
||||
"B" : "reject and move to folder BAD",
|
||||
"L" : "leave file as is, do not send reaction"
|
||||
}
|
||||
|
||||
|
||||
bot_client = WebClient(
|
||||
token = configuration.parsed["SLACK"]["auth_token"]
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def file_overview(file_url: str, file_attributes: list, options: dict) -> None:
|
||||
"""Prints a neat overview of the current article"""
|
||||
file_table = Table(
|
||||
title = file_url,
|
||||
row_styles = ["white", "bright_black"],
|
||||
min_width = 150
|
||||
)
|
||||
|
||||
file_table.add_column("Attribute", justify = "right", no_wrap = True)
|
||||
file_table.add_column("Value set by auto_news")
|
||||
file_table.add_column("Status", justify = "right")
|
||||
for attr in file_attributes:
|
||||
file_table.add_row(attr["name"], attr["value"], attr["status"])
|
||||
|
||||
|
||||
option_key = "\n".join([f"[[bold]{k}[/bold]]" for k in options.keys()])
|
||||
option_action = "\n".join([f"[italic]{k}[/italic]" for k in options.values()])
|
||||
columns = Columns([option_key, option_action])
|
||||
|
||||
console.print(file_table)
|
||||
console.print("Your options:")
|
||||
console.print(columns)
|
||||
|
||||
|
||||
def send_reaction_to_slack_thread(article, reaction):
|
||||
"""Sends the verification status as a reaction to the associated slack thread. This will significantly decrease load times of the bot"""
|
||||
messages = models.Message.select().where(models.Message.text.contains(article.article_url))
|
||||
# TODO rewrite this shit
|
||||
if len(messages) > 5:
|
||||
print("Found more than 5 messages. Aborting reactions...")
|
||||
return
|
||||
for m in messages:
|
||||
if not m.has_single_url:
|
||||
print("Found thread but won't send reaction because thread has multiple urls")
|
||||
pass
|
||||
else:
|
||||
ts = m.slack_ts
|
||||
bot_client.reactions_add(
|
||||
channel=configuration.parsed["SLACK"]["archive_id"],
|
||||
name=reaction,
|
||||
timestamp=ts
|
||||
)
|
||||
print("Sent reaction to message")
|
||||
|
||||
def prompt_language(query):
|
||||
not_set = True
|
||||
while not_set:
|
||||
uin = input("Set language (nation-code, 2 letters) ")
|
||||
if len(uin) != 2:
|
||||
print("Bad code, try again")
|
||||
else:
|
||||
not_set = False
|
||||
query.language = uin
|
||||
query.save()
|
||||
|
||||
|
||||
def prompt_related(query):
|
||||
file_list = []
|
||||
finished = False
|
||||
while not finished:
|
||||
uin = input("Additional file for article? Type '1' to cancel ")
|
||||
if uin == "1":
|
||||
query.set_related(file_list)
|
||||
finished = True
|
||||
else:
|
||||
file_list.append(uin)
|
||||
|
||||
|
||||
def prompt_new_fname(query):
|
||||
uin = input("New fname? ")
|
||||
old_fname = query.file_name
|
||||
query.file_name = uin
|
||||
query.verified = 1
|
||||
if old_fname != "":
|
||||
os.remove(query.save_path + old_fname)
|
||||
query.save()
|
||||
|
||||
|
||||
|
||||
def reject_article(article):
|
||||
article.verified = -1
|
||||
article.save()
|
||||
print("Article marked as bad")
|
||||
# also update the threads to not be monitored anymore
|
||||
send_reaction_to_slack_thread(article, "x")
|
||||
|
||||
|
||||
def unreject_article(query):
|
||||
query.verified = 1
|
||||
query.save()
|
||||
# os.rename(badpdf, fname)
|
||||
print("File set to verified")
|
||||
|
||||
|
||||
def accept_article(article, last_accepted):
|
||||
article.verified = 1
|
||||
article.save()
|
||||
print("Article accepted as GOOD")
|
||||
|
||||
# also update the threads to not be monitored anymore
|
||||
send_reaction_to_slack_thread(article, "white_check_mark")
|
||||
|
||||
"""linked = None
|
||||
try:
|
||||
thread = message_models.Thread.get(id = last_accepted.id + 1)
|
||||
rel = message_models.get_referenced_articles(thread, article_models.ArticleDownload)
|
||||
assert len(rel) == 1 and rel[0] == article
|
||||
linked = thread
|
||||
except: # if the above, naive method (just increment by one), fails, resort to brute search.
|
||||
print("Bruteforcing search")
|
||||
for t in message_models.Thread.select():
|
||||
rel = message_models.get_referenced_articles(t, article_models.ArticleDownload)
|
||||
if len(rel) == 1 and rel[0] == article:
|
||||
linked = t
|
||||
break
|
||||
|
||||
if linked:
|
||||
linked.initiator_message.is_processed_override = 1
|
||||
linked.initiator_message.save()
|
||||
print("Message overwritten to PROCESSED")
|
||||
|
||||
else:
|
||||
print("No matching thread found")"""
|
||||
return "" # linked
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def verify_unchecked():
|
||||
query = models.ArticleDownload.select().where(models.ArticleDownload.verified == 0).execute()
|
||||
last_linked = None
|
||||
|
||||
for article in query:
|
||||
console.print(hline)
|
||||
core_info = []
|
||||
for e, name in zip([article.save_path, article.file_name, article.title, article.language], ["Save path", "File name", "Title", "Language"]):
|
||||
entry = {
|
||||
"status" : "[red]██[/red]" if (len(e) == 0 or e == -1) else "[green]██[/green]",
|
||||
"value" : e if len(e) != 0 else "not set",
|
||||
"name" : name
|
||||
}
|
||||
core_info.append(entry)
|
||||
|
||||
try:
|
||||
subprocess.Popen(["evince", f"file://{os.path.join(article.save_path, article.file_name)}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
# supress evince gtk warnings
|
||||
except Exception as e:
|
||||
print(str(list((-1, e))))
|
||||
continue
|
||||
|
||||
|
||||
|
||||
file_overview(
|
||||
file_url = article.article_url,
|
||||
file_attributes=core_info,
|
||||
options = u_options
|
||||
)
|
||||
|
||||
|
||||
proceed = False
|
||||
while not proceed:
|
||||
proceed = False
|
||||
uin = input("Choice ?").lower()
|
||||
if uin == "":
|
||||
last_linked = accept_article(article, last_linked) # last linked accelerates the whole process
|
||||
proceed = True
|
||||
elif uin == "d":
|
||||
article.language = "de"
|
||||
article.verified = 1
|
||||
article.save()
|
||||
proceed = True
|
||||
elif uin == "e":
|
||||
article.language = "en"
|
||||
article.verified = 1
|
||||
article.save()
|
||||
proceed = True
|
||||
elif uin == "o":
|
||||
prompt_language(article)
|
||||
elif uin == "r":
|
||||
prompt_related(article)
|
||||
elif uin == "b":
|
||||
reject_article(article)
|
||||
proceed = True
|
||||
elif uin == "l":
|
||||
# do nothing
|
||||
proceed = True
|
||||
else:
|
||||
print("Invalid input")
|
||||
|
||||
|
||||
|
||||
|
||||
# def verify_bad():
|
||||
# b_options = {
|
||||
# "ENTER":"Accept pdf as fixed",
|
||||
# "B": "Keep pdf in BAD.",
|
||||
# "R" : "set related files (prompted multiple times)",
|
||||
# "C" : "Change the saved file-name and set as verified."
|
||||
# }
|
||||
# query = article_models.ArticleDownload.select().where(article_models.ArticleDownload.verified == -1).execute()
|
||||
|
||||
# for q in query:
|
||||
# pdf = q.file_name
|
||||
# save_dir = get_save_path(q)
|
||||
# fname = save_dir + "BAD/" + pdf
|
||||
# try:
|
||||
# subprocess.call(["xdg-open", fname])
|
||||
# except:
|
||||
# print(f"[{testvar}██{testvar}] PDF moved:")
|
||||
# print(fname)
|
||||
# continue
|
||||
|
||||
# status_pdf = f"{testvar}██{testvar}"
|
||||
# if "just a moment" in pdf:
|
||||
# status_pdf = f"{testvar}██{testvar}"
|
||||
|
||||
# language = q.language
|
||||
# status_language = f"{testvar}██{testvar}"
|
||||
# if len(language) == 0:
|
||||
# status_language = f"{testvar}██{testvar}"
|
||||
|
||||
|
||||
# print_status_options(
|
||||
# status=u_status.format(
|
||||
# url = q.article_url,
|
||||
# status_pdf = status_pdf,
|
||||
# pdf = pdf[:80],
|
||||
# status_language = status_language,
|
||||
# language = language
|
||||
# ),
|
||||
# options = b_options)
|
||||
|
||||
|
||||
|
||||
# proceed = False
|
||||
# while not proceed:
|
||||
# proceed = False
|
||||
# uin = input("Choice? ").lower()
|
||||
# if uin == "":
|
||||
# unreject_article(q)
|
||||
# proceed = True
|
||||
# elif uin == "b":
|
||||
# proceed = True
|
||||
# elif uin == "r":
|
||||
# prompt_related(q)
|
||||
# elif uin == "c":
|
||||
# prompt_new_fname(q)
|
||||
# proceed = True
|
||||
# else:
|
||||
# print("Invalid input")
|
||||
|
@ -31,10 +31,3 @@ def shrink_pdf(article):
|
||||
logger.error(f"Could not run the compression! {c.stderr.decode()} - {c.stdout.decode()}")
|
||||
|
||||
return article
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# gs -sDEVICE=pdfwrite -dPDFSETTINGS=/screen -dNOPAUSE -dBATCH -sOutputFile=out.pdf
|
||||
# ; mv -f temp.pdf file.pdf
|
@ -29,7 +29,7 @@ class TemplateWorker(Thread):
|
||||
time.sleep(5)
|
||||
else:
|
||||
article_watcher = self._article_queue.pop(0)
|
||||
self.logger.info(f"{self.__class__.__name__} is now processing an article")
|
||||
self.logger.info(f"{self.__class__.__name__} is now processing article ({len(self._article_queue)} in queue)")
|
||||
self._handle_article(article_watcher)
|
||||
|
||||
|
@ -1,38 +0,0 @@
|
||||
import logging
|
||||
import keys
|
||||
from peewee import SqliteDatabase
|
||||
|
||||
from persistence import article_models
|
||||
from archiving_utils import runner as archive_runner
|
||||
from mail_utils import runner as mail_runner
|
||||
|
||||
# Global logger setup:
|
||||
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S')
|
||||
logger = logging.getLogger("MailThread")
|
||||
|
||||
|
||||
# Constant values...
|
||||
DOWNLOADS_DB = "/app/file_storage/downloads.db"
|
||||
|
||||
|
||||
# DB Setup:
|
||||
article_models.set_db(SqliteDatabase(
|
||||
DOWNLOADS_DB,
|
||||
pragmas = {'journal_mode': 'wal'} # mutliple threads can access at once
|
||||
))
|
||||
|
||||
|
||||
mail_worker = mail_runner.MailSender(keys.MAIL_UNAME, keys.MAIL_PASSWORD, keys.MAIL_SENDER, keys.MAIL_RECIPIENT)
|
||||
dl_worker = archive_runner.ArchivingThread(article_models, mail_worker)
|
||||
dl_worker.start()
|
||||
|
||||
|
||||
|
||||
# Retroactively sends a message to DIRK for messages that were archived using slack, but when the mail-reply was not yet implemented
|
||||
|
||||
|
||||
|
||||
url_list = []
|
||||
|
||||
for url in url_list:
|
||||
dl_worker.get_or_save(url)
|
Loading…
x
Reference in New Issue
Block a user