Better structure
This commit is contained in:
parent
0a6dde8c78
commit
0d76bcbb98
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,3 +2,4 @@
|
|||||||
|
|
||||||
*.pyc
|
*.pyc
|
||||||
*.log
|
*.log
|
||||||
|
__pycache__/
|
30
README.md
30
README.md
@ -6,26 +6,25 @@ A utility to fetch article requests from slack and generate pdfs for them, fully
|
|||||||
## Running
|
## Running
|
||||||
### How to run - auto archiving mode
|
### How to run - auto archiving mode
|
||||||
In this mode the program is launched as a docker container, in a headless mode. For persistence purposes a local storage volume is required, but that's it!
|
In this mode the program is launched as a docker container, in a headless mode. For persistence purposes a local storage volume is required, but that's it!
|
||||||
|
|
||||||
`docker run -it -v <your storage>:/app/file_storage/ auto_news`
|
`docker run -it -v <your storage>:/app/file_storage/ auto_news`
|
||||||
|
|
||||||
You can specify additional parameters:
|
You can specify additional parameters:
|
||||||
|
|
||||||
`docker run -it -v <your storage>:/app/file_storage/ auto_news debug` runs with debug values (does not write to prod db, does not send mails)
|
`docker run -it -v <your storage>:/app/file_storage/ auto_news debug` runs with debug values (does not write to prod db, does not send mails)
|
||||||
`docker run -it -v <your storage>:/app/file_storage/ auto_news upload` catches up on past uploads to archive.
|
|
||||||
`docker run -it -v <your storage>:/app/file_storage/ -e DISPLAY=":0" --network host -v \$XAUTHORITY:/root/.Xauthority auto_news check` lets you visually verify the downloaded files. Be aware that it requires additional parameters in order to open guis on the host.
|
`docker run -it -v <your storage>:/app/file_storage/ auto_news upload` catches up on incomplete uploads to archive.
|
||||||
|
|
||||||
|
`docker run -it -v <your storage>:/app/file_storage/ -e DISPLAY=":0" --network host -v \$XAUTHORITY:/root/.Xauthority auto_news check` lets you visually verify the downloaded files. The additional parameters are required in order to open guis on the host.
|
||||||
|
|
||||||
|
|
||||||
### How to run - development mode
|
### How to run - development mode
|
||||||
In this mode, a docker container is launched with an additional volume, the local code. You can test your code without the need to rebuild the image.
|
In this mode, a docker container is launched with an additional volume, the local code. You can test your code without the need to rebuild the image.
|
||||||
|
|
||||||
`docker run -it -v <your storage>:/app/file_storage/ -v <your code>:/code/ --entry-point /bin/bash auto_news`
|
`docker run -it -v <your storage>:/app/file_storage/ -v <your code>:/code/ --entry-point /bin/bash auto_news`
|
||||||
You are droppped into a bash shell, in which you can navigate to the `/code` directory and then test live.
|
You are droppped into a bash shell, in which you can navigate to the `/code` directory and then test live.
|
||||||
|
|
||||||
|
|
||||||
% ### How to run - file checker mode
|
|
||||||
% This mode requires the most access rights. You want to access all files and open gui programs.
|
|
||||||
% `docker run -it -e DISPLAY=":0" --network host -v $XAUTHORITY:/root/.Xauthority -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -v /mnt/Data/COSS/DOWNLOADS/auto_news/app:/code auto_news /bin/bash`
|
|
||||||
% Similarly to the development mode, you can cd into code and run your checking duties.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Building
|
## Building
|
||||||
@ -41,6 +40,17 @@ where the `Dockerfile` has to be in the working directory
|
|||||||
|
|
||||||
## Cheat-sheet Remy:
|
## Cheat-sheet Remy:
|
||||||
|
|
||||||
docker run -it -e LIVECODE=TRUE -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -v /mnt/Data/COSS/DOWNLOADS/auto_news/app:/code/ auto_news /bin/bash
|
`docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ auto_news`
|
||||||
|
|
||||||
docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ auto_news
|
`docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -v /mnt/Data/COSS/auto_news/app:/code --entrypoint /bin/bash auto_news`
|
||||||
|
|
||||||
|
|
||||||
|
`docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -e DISPLAY=":0" --network host -v \$XAUTHORITY:/root/.Xauthority auto_news check`
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Roadmap:
|
||||||
|
|
||||||
|
[] automatically upload files to NAS
|
||||||
|
[] handle paywalled sites like faz, spiegel, .. through their dedicated edu-sites
|
||||||
|
...
|
@ -7,7 +7,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
from utils_mail import runner as mail_runner
|
from utils_mail import runner as mail_runner
|
||||||
from utils_slack import runner as slack_runner
|
from utils_slack import runner as slack_runner
|
||||||
from utils.workers import CompressWorker, DownloadWorker, FetchWorker, UploadWorker
|
from utils_worker.workers import CompressWorker, DownloadWorker, FetchWorker, UploadWorker
|
||||||
|
|
||||||
|
|
||||||
class ArticleWatcher:
|
class ArticleWatcher:
|
||||||
@ -174,7 +174,8 @@ if __name__ == "__main__":
|
|||||||
logger.info(f"Launching upload to archive for {len(urls)} urls.")
|
logger.info(f"Launching upload to archive for {len(urls)} urls.")
|
||||||
coordinator.manual_processing(urls, [UploadWorker()])
|
coordinator.manual_processing(urls, [UploadWorker()])
|
||||||
elif "check" in sys.argv:
|
elif "check" in sys.argv:
|
||||||
logger.info("Not implemented yet.")
|
from utils_check import runner as check_runner
|
||||||
|
check_runner.verify_unchecked()
|
||||||
else: # launch with full action
|
else: # launch with full action
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"worker_download" : DownloadWorker(),
|
"worker_download" : DownloadWorker(),
|
||||||
|
285
app/utils_check/runner.py
Normal file
285
app/utils_check/runner.py
Normal file
@ -0,0 +1,285 @@
|
|||||||
|
from rich.console import Console
|
||||||
|
from rich.table import Table
|
||||||
|
from rich.columns import Columns
|
||||||
|
from rich.rule import Rule
|
||||||
|
console = Console()
|
||||||
|
hline = Rule(style="white")
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
from slack_sdk import WebClient
|
||||||
|
import configuration
|
||||||
|
models = configuration.models
|
||||||
|
|
||||||
|
u_options = {
|
||||||
|
"ENTER" : "Accept PDF as is. It gets marked as verified",
|
||||||
|
"D" : "set languange to DE and set verified",
|
||||||
|
"E" : "set languange to EN and set verified",
|
||||||
|
"O" : "set other language (prompted)",
|
||||||
|
"R" : "set related files (prompted multiple times)",
|
||||||
|
"B" : "reject and move to folder BAD",
|
||||||
|
"L" : "leave file as is, do not send reaction"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bot_client = WebClient(
|
||||||
|
token = configuration.parsed["SLACK"]["auth_token"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def file_overview(file_url: str, file_attributes: list, options: dict) -> None:
|
||||||
|
"""Prints a neat overview of the current article"""
|
||||||
|
file_table = Table(
|
||||||
|
title = file_url,
|
||||||
|
row_styles = ["white", "bright_black"],
|
||||||
|
min_width = 150
|
||||||
|
)
|
||||||
|
|
||||||
|
file_table.add_column("Attribute", justify = "right", no_wrap = True)
|
||||||
|
file_table.add_column("Value set by auto_news")
|
||||||
|
file_table.add_column("Status", justify = "right")
|
||||||
|
for attr in file_attributes:
|
||||||
|
file_table.add_row(attr["name"], attr["value"], attr["status"])
|
||||||
|
|
||||||
|
|
||||||
|
option_key = "\n".join([f"[[bold]{k}[/bold]]" for k in options.keys()])
|
||||||
|
option_action = "\n".join([f"[italic]{k}[/italic]" for k in options.values()])
|
||||||
|
columns = Columns([option_key, option_action])
|
||||||
|
|
||||||
|
console.print(file_table)
|
||||||
|
console.print("Your options:")
|
||||||
|
console.print(columns)
|
||||||
|
|
||||||
|
|
||||||
|
def send_reaction_to_slack_thread(article, reaction):
|
||||||
|
"""Sends the verification status as a reaction to the associated slack thread. This will significantly decrease load times of the bot"""
|
||||||
|
messages = models.Message.select().where(models.Message.text.contains(article.article_url))
|
||||||
|
# TODO rewrite this shit
|
||||||
|
if len(messages) > 5:
|
||||||
|
print("Found more than 5 messages. Aborting reactions...")
|
||||||
|
return
|
||||||
|
for m in messages:
|
||||||
|
if not m.has_single_url:
|
||||||
|
print("Found thread but won't send reaction because thread has multiple urls")
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
ts = m.slack_ts
|
||||||
|
bot_client.reactions_add(
|
||||||
|
channel=configuration.parsed["SLACK"]["archive_id"],
|
||||||
|
name=reaction,
|
||||||
|
timestamp=ts
|
||||||
|
)
|
||||||
|
print("Sent reaction to message")
|
||||||
|
|
||||||
|
def prompt_language(query):
|
||||||
|
not_set = True
|
||||||
|
while not_set:
|
||||||
|
uin = input("Set language (nation-code, 2 letters) ")
|
||||||
|
if len(uin) != 2:
|
||||||
|
print("Bad code, try again")
|
||||||
|
else:
|
||||||
|
not_set = False
|
||||||
|
query.language = uin
|
||||||
|
query.save()
|
||||||
|
|
||||||
|
|
||||||
|
def prompt_related(query):
|
||||||
|
file_list = []
|
||||||
|
finished = False
|
||||||
|
while not finished:
|
||||||
|
uin = input("Additional file for article? Type '1' to cancel ")
|
||||||
|
if uin == "1":
|
||||||
|
query.set_related(file_list)
|
||||||
|
finished = True
|
||||||
|
else:
|
||||||
|
file_list.append(uin)
|
||||||
|
|
||||||
|
|
||||||
|
def prompt_new_fname(query):
|
||||||
|
uin = input("New fname? ")
|
||||||
|
old_fname = query.file_name
|
||||||
|
query.file_name = uin
|
||||||
|
query.verified = 1
|
||||||
|
if old_fname != "":
|
||||||
|
os.remove(query.save_path + old_fname)
|
||||||
|
query.save()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def reject_article(article):
|
||||||
|
article.verified = -1
|
||||||
|
article.save()
|
||||||
|
print("Article marked as bad")
|
||||||
|
# also update the threads to not be monitored anymore
|
||||||
|
send_reaction_to_slack_thread(article, "x")
|
||||||
|
|
||||||
|
|
||||||
|
def unreject_article(query):
|
||||||
|
query.verified = 1
|
||||||
|
query.save()
|
||||||
|
# os.rename(badpdf, fname)
|
||||||
|
print("File set to verified")
|
||||||
|
|
||||||
|
|
||||||
|
def accept_article(article, last_accepted):
|
||||||
|
article.verified = 1
|
||||||
|
article.save()
|
||||||
|
print("Article accepted as GOOD")
|
||||||
|
|
||||||
|
# also update the threads to not be monitored anymore
|
||||||
|
send_reaction_to_slack_thread(article, "white_check_mark")
|
||||||
|
|
||||||
|
"""linked = None
|
||||||
|
try:
|
||||||
|
thread = message_models.Thread.get(id = last_accepted.id + 1)
|
||||||
|
rel = message_models.get_referenced_articles(thread, article_models.ArticleDownload)
|
||||||
|
assert len(rel) == 1 and rel[0] == article
|
||||||
|
linked = thread
|
||||||
|
except: # if the above, naive method (just increment by one), fails, resort to brute search.
|
||||||
|
print("Bruteforcing search")
|
||||||
|
for t in message_models.Thread.select():
|
||||||
|
rel = message_models.get_referenced_articles(t, article_models.ArticleDownload)
|
||||||
|
if len(rel) == 1 and rel[0] == article:
|
||||||
|
linked = t
|
||||||
|
break
|
||||||
|
|
||||||
|
if linked:
|
||||||
|
linked.initiator_message.is_processed_override = 1
|
||||||
|
linked.initiator_message.save()
|
||||||
|
print("Message overwritten to PROCESSED")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("No matching thread found")"""
|
||||||
|
return "" # linked
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def verify_unchecked():
|
||||||
|
query = models.ArticleDownload.select().where(models.ArticleDownload.verified == 0).execute()
|
||||||
|
last_linked = None
|
||||||
|
|
||||||
|
for article in query:
|
||||||
|
console.print(hline)
|
||||||
|
core_info = []
|
||||||
|
for e, name in zip([article.save_path, article.file_name, article.title, article.language], ["Save path", "File name", "Title", "Language"]):
|
||||||
|
entry = {
|
||||||
|
"status" : "[red]██[/red]" if (len(e) == 0 or e == -1) else "[green]██[/green]",
|
||||||
|
"value" : e if len(e) != 0 else "not set",
|
||||||
|
"name" : name
|
||||||
|
}
|
||||||
|
core_info.append(entry)
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.Popen(["evince", f"file://{os.path.join(article.save_path, article.file_name)}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
# supress evince gtk warnings
|
||||||
|
except Exception as e:
|
||||||
|
print(str(list((-1, e))))
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
file_overview(
|
||||||
|
file_url = article.article_url,
|
||||||
|
file_attributes=core_info,
|
||||||
|
options = u_options
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
proceed = False
|
||||||
|
while not proceed:
|
||||||
|
proceed = False
|
||||||
|
uin = input("Choice ?").lower()
|
||||||
|
if uin == "":
|
||||||
|
last_linked = accept_article(article, last_linked) # last linked accelerates the whole process
|
||||||
|
proceed = True
|
||||||
|
elif uin == "d":
|
||||||
|
article.language = "de"
|
||||||
|
article.verified = 1
|
||||||
|
article.save()
|
||||||
|
proceed = True
|
||||||
|
elif uin == "e":
|
||||||
|
article.language = "en"
|
||||||
|
article.verified = 1
|
||||||
|
article.save()
|
||||||
|
proceed = True
|
||||||
|
elif uin == "o":
|
||||||
|
prompt_language(article)
|
||||||
|
elif uin == "r":
|
||||||
|
prompt_related(article)
|
||||||
|
elif uin == "b":
|
||||||
|
reject_article(article)
|
||||||
|
proceed = True
|
||||||
|
elif uin == "l":
|
||||||
|
# do nothing
|
||||||
|
proceed = True
|
||||||
|
else:
|
||||||
|
print("Invalid input")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# def verify_bad():
|
||||||
|
# b_options = {
|
||||||
|
# "ENTER":"Accept pdf as fixed",
|
||||||
|
# "B": "Keep pdf in BAD.",
|
||||||
|
# "R" : "set related files (prompted multiple times)",
|
||||||
|
# "C" : "Change the saved file-name and set as verified."
|
||||||
|
# }
|
||||||
|
# query = article_models.ArticleDownload.select().where(article_models.ArticleDownload.verified == -1).execute()
|
||||||
|
|
||||||
|
# for q in query:
|
||||||
|
# pdf = q.file_name
|
||||||
|
# save_dir = get_save_path(q)
|
||||||
|
# fname = save_dir + "BAD/" + pdf
|
||||||
|
# try:
|
||||||
|
# subprocess.call(["xdg-open", fname])
|
||||||
|
# except:
|
||||||
|
# print(f"[{testvar}██{testvar}] PDF moved:")
|
||||||
|
# print(fname)
|
||||||
|
# continue
|
||||||
|
|
||||||
|
# status_pdf = f"{testvar}██{testvar}"
|
||||||
|
# if "just a moment" in pdf:
|
||||||
|
# status_pdf = f"{testvar}██{testvar}"
|
||||||
|
|
||||||
|
# language = q.language
|
||||||
|
# status_language = f"{testvar}██{testvar}"
|
||||||
|
# if len(language) == 0:
|
||||||
|
# status_language = f"{testvar}██{testvar}"
|
||||||
|
|
||||||
|
|
||||||
|
# print_status_options(
|
||||||
|
# status=u_status.format(
|
||||||
|
# url = q.article_url,
|
||||||
|
# status_pdf = status_pdf,
|
||||||
|
# pdf = pdf[:80],
|
||||||
|
# status_language = status_language,
|
||||||
|
# language = language
|
||||||
|
# ),
|
||||||
|
# options = b_options)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# proceed = False
|
||||||
|
# while not proceed:
|
||||||
|
# proceed = False
|
||||||
|
# uin = input("Choice? ").lower()
|
||||||
|
# if uin == "":
|
||||||
|
# unreject_article(q)
|
||||||
|
# proceed = True
|
||||||
|
# elif uin == "b":
|
||||||
|
# proceed = True
|
||||||
|
# elif uin == "r":
|
||||||
|
# prompt_related(q)
|
||||||
|
# elif uin == "c":
|
||||||
|
# prompt_new_fname(q)
|
||||||
|
# proceed = True
|
||||||
|
# else:
|
||||||
|
# print("Invalid input")
|
||||||
|
|
@ -31,10 +31,3 @@ def shrink_pdf(article):
|
|||||||
logger.error(f"Could not run the compression! {c.stderr.decode()} - {c.stdout.decode()}")
|
logger.error(f"Could not run the compression! {c.stderr.decode()} - {c.stdout.decode()}")
|
||||||
|
|
||||||
return article
|
return article
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# gs -sDEVICE=pdfwrite -dPDFSETTINGS=/screen -dNOPAUSE -dBATCH -sOutputFile=out.pdf
|
|
||||||
# ; mv -f temp.pdf file.pdf
|
|
@ -29,7 +29,7 @@ class TemplateWorker(Thread):
|
|||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
else:
|
else:
|
||||||
article_watcher = self._article_queue.pop(0)
|
article_watcher = self._article_queue.pop(0)
|
||||||
self.logger.info(f"{self.__class__.__name__} is now processing an article")
|
self.logger.info(f"{self.__class__.__name__} is now processing article ({len(self._article_queue)} in queue)")
|
||||||
self._handle_article(article_watcher)
|
self._handle_article(article_watcher)
|
||||||
|
|
||||||
|
|
@ -1,38 +0,0 @@
|
|||||||
import logging
|
|
||||||
import keys
|
|
||||||
from peewee import SqliteDatabase
|
|
||||||
|
|
||||||
from persistence import article_models
|
|
||||||
from archiving_utils import runner as archive_runner
|
|
||||||
from mail_utils import runner as mail_runner
|
|
||||||
|
|
||||||
# Global logger setup:
|
|
||||||
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S')
|
|
||||||
logger = logging.getLogger("MailThread")
|
|
||||||
|
|
||||||
|
|
||||||
# Constant values...
|
|
||||||
DOWNLOADS_DB = "/app/file_storage/downloads.db"
|
|
||||||
|
|
||||||
|
|
||||||
# DB Setup:
|
|
||||||
article_models.set_db(SqliteDatabase(
|
|
||||||
DOWNLOADS_DB,
|
|
||||||
pragmas = {'journal_mode': 'wal'} # mutliple threads can access at once
|
|
||||||
))
|
|
||||||
|
|
||||||
|
|
||||||
mail_worker = mail_runner.MailSender(keys.MAIL_UNAME, keys.MAIL_PASSWORD, keys.MAIL_SENDER, keys.MAIL_RECIPIENT)
|
|
||||||
dl_worker = archive_runner.ArchivingThread(article_models, mail_worker)
|
|
||||||
dl_worker.start()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Retroactively sends a message to DIRK for messages that were archived using slack, but when the mail-reply was not yet implemented
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
url_list = []
|
|
||||||
|
|
||||||
for url in url_list:
|
|
||||||
dl_worker.get_or_save(url)
|
|
Loading…
x
Reference in New Issue
Block a user