181 lines
6.5 KiB
Python
181 lines
6.5 KiB
Python
import logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
from peewee import *
|
|
import os
|
|
import markdown
|
|
import configuration
|
|
import datetime
|
|
|
|
from . import helpers
|
|
config = configuration.main_config["DOWNLOADS"]
|
|
slack_config = configuration.main_config["SLACK"]
|
|
FILE_SIZE_THRESHOLD = 15 * 1024 * 1024 # 15MB
|
|
|
|
|
|
# set the nature of the db at runtime
|
|
download_db = DatabaseProxy()
|
|
|
|
|
|
class DownloadBaseModel(Model):
|
|
class Meta:
|
|
database = download_db
|
|
|
|
|
|
|
|
## == Article related models == ##
|
|
class ArticleDownload(DownloadBaseModel):
|
|
# in the beginning this is all we have
|
|
article_url = TextField(default = '', unique=True)
|
|
|
|
# fetch then fills in the metadata
|
|
title = TextField(default='')
|
|
@property
|
|
def is_title_bad(self): # add incrementally
|
|
return "PUR-Abo" in self.title \
|
|
or "Redirecting" in self.title \
|
|
or "Error while running fetch" in self.title
|
|
|
|
summary = TextField(default = '')
|
|
source_name = CharField(default = '')
|
|
language = CharField(default = '')
|
|
|
|
|
|
file_name = TextField(default = '')
|
|
@property
|
|
def save_path(self):
|
|
return f"{config['local_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/"
|
|
@property
|
|
def fname_nas(self, file_name=""):
|
|
if self.download_date:
|
|
if file_name:
|
|
return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{file_name}"
|
|
else: # return the self. name
|
|
return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{self.file_name}"
|
|
else:
|
|
return None
|
|
@property
|
|
def fname_template(self):
|
|
if "youtube.com" in self.source_name or "youtu.be" in self.source_name:
|
|
fname = f"{self.source_name} -- {self.title}"
|
|
else:
|
|
fname = f"{self.source_name} -- {self.title}.pdf"
|
|
return helpers.clear_path_name(fname)
|
|
|
|
|
|
archive_url = TextField(default = '')
|
|
pub_date = DateField(default = datetime.date.fromtimestamp(0))
|
|
download_date = DateField(default = datetime.date.today)
|
|
|
|
slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by
|
|
@property
|
|
def slack_ts_full(self):
|
|
str_ts = str(self.slack_ts)
|
|
cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals
|
|
return f"{str_ts}{cut_zeros * '0'}"
|
|
|
|
sent = BooleanField(default = False)
|
|
|
|
archived_by = CharField(default = os.getenv("UNAME"))
|
|
# need to know who saved the message because the file needs to be on their computer in order to get verified
|
|
# verification happens in a different app, but the model has the fields here as well
|
|
comment = TextField(default = '')
|
|
verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad
|
|
|
|
# authors
|
|
# keywords
|
|
# ... are added through foreignkeys
|
|
# we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db
|
|
|
|
|
|
## Helpers specific to a single article
|
|
def __str__(self) -> str:
|
|
if self.title != '' and self.source_name != '':
|
|
desc = f"{helpers.shorten_name(self.title)} -- {self.source_name}"
|
|
else:
|
|
desc = f"{self.article_url}"
|
|
return f"ART [{desc}]"
|
|
|
|
def mail_info(self):
|
|
summary = "\n> " + "\n> ".join(self.summary.split("\n"))
|
|
answer_text = f"[{self.article_url}]({self.article_url})\n\n" # first the url
|
|
answer_files = []
|
|
# displays the summary in a blockquote
|
|
|
|
status = self.file_status
|
|
if status == 1: # file_name was empty
|
|
return None # there has been an error do not send any message
|
|
elif status == 2: # no file found at specified location
|
|
answer_text += f"*{self.title}*\n{summary}\nFilename: {self.file_name}"
|
|
elif status == 3: # file found but deemed too big
|
|
location = f"File not sent directly. Location on NAS:\n`{self.fname_nas}`"
|
|
answer_text += f"*{self.title}*\n{summary}\n{location}"
|
|
else: # everything nominal
|
|
answer_text += f"*{self.title}*\n{summary}"
|
|
answer_files.append(self.save_path + self.file_name)
|
|
|
|
# then the related files
|
|
if self.related:
|
|
rel_text = "Related files on NAS:"
|
|
for r in self.related:
|
|
fname = r.related_file_name
|
|
rel_text += f"\n• `{self.fname_nas(fname)}` "
|
|
|
|
answer_text += "\n\n" + rel_text
|
|
|
|
return markdown.markdown(answer_text), answer_files
|
|
|
|
|
|
def set_authors(self, authors):
|
|
for a in authors:
|
|
if len(a) < 100: # otherwise it's a mismatched string
|
|
ArticleAuthor.create(
|
|
article = self,
|
|
author = a
|
|
)
|
|
|
|
def set_related(self, related):
|
|
for r in related:
|
|
if len(r) > 255:
|
|
raise Exception("Related file name too long for POSTGRES")
|
|
|
|
ArticleRelated.create(
|
|
article = self,
|
|
related_file_name = r
|
|
)
|
|
|
|
@property
|
|
def file_status(self):
|
|
"""0 = file exists, 1 = no file name!, 2 = file does not exit,3 = file exists but is too large"""
|
|
if not self.file_name:
|
|
logger.error(f"Article {self} has no filename!")
|
|
return 2
|
|
file_path_abs = self.save_path + self.file_name
|
|
if not os.path.exists(file_path_abs):
|
|
logger.error(f"Article {self} has a filename, but the file does not exist at that location!")
|
|
return 2
|
|
if (os.path.splitext(file_path_abs)[1] != ".pdf") or (os.path.getsize(file_path_abs) > FILE_SIZE_THRESHOLD):
|
|
logger.warning(f"Article {self} has a file that exceeds the file size limit.")
|
|
return 3
|
|
|
|
|
|
|
|
class ArticleAuthor(DownloadBaseModel):
|
|
article = ForeignKeyField(ArticleDownload, backref='authors')
|
|
author = CharField()
|
|
|
|
|
|
class ArticleRelated(DownloadBaseModel):
|
|
# Related files, such as the full text of a paper, audio files, etc.
|
|
article = ForeignKeyField(ArticleDownload, backref='related')
|
|
related_file_name = TextField(default = '')
|
|
|
|
|
|
|
|
|
|
|
|
def set_db(download_db_object):
|
|
download_db.initialize(download_db_object)
|
|
with download_db: # create tables (does nothing if they exist already)
|
|
download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated])
|