192 lines
7.0 KiB
Python

import logging
logger = logging.getLogger(__name__)
from peewee import *
import os
import markdown
import configuration
import datetime
from . import helpers
config = configuration.main_config["DOWNLOADS"]
slack_config = configuration.main_config["SLACK"]
# set the nature of the db at runtime
download_db = DatabaseProxy()
class DownloadBaseModel(Model):
class Meta:
database = download_db
## == Article related models == ##
class ArticleDownload(DownloadBaseModel):
# in the beginning this is all we have
article_url = TextField(default = '', unique=True)
# fetch then fills in the metadata
title = TextField(default='')
@property
def is_title_bad(self): # add incrementally
return "PUR-Abo" in self.title \
or "Redirecting" in self.title \
or "Error while running fetch" in self.title
summary = TextField(default = '')
source_name = CharField(default = '')
language = CharField(default = '')
file_name = TextField(default = '')
@property
def save_path(self):
return f"{config['local_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/"
@property
def fname_nas(self, file_name=""):
if self.download_date:
if file_name:
return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{file_name}"
else: # return the self. name
return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{self.file_name}"
else:
return None
@property
def fname_template(self):
if "youtube.com" in self.source_name or "youtu.be" in self.source_name:
fname = f"{self.source_name} -- {self.title}"
else:
fname = f"{self.source_name} -- {self.title}.pdf"
return helpers.clear_path_name(fname)
archive_url = TextField(default = '')
pub_date = DateField(default = datetime.date.fromtimestamp(0))
download_date = DateField(default = datetime.date.today)
slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by
@property
def slack_ts_full(self):
str_ts = str(self.slack_ts)
cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals
return f"{str_ts}{cut_zeros * '0'}"
sent = BooleanField(default = False)
archived_by = CharField(default = os.getenv("UNAME"))
# need to know who saved the message because the file needs to be on their computer in order to get verified
# verification happens in a different app, but the model has the fields here as well
comment = TextField(default = '')
verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad
# authors
# keywords
# ... are added through foreignkeys
# we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db
## Helpers specific to a single article
def __str__(self) -> str:
if self.title != '' and self.source_name != '':
desc = f"{helpers.shorten_name(self.title)} -- {self.source_name}"
else:
desc = f"{self.article_url}"
return f"ART [{desc}]"
@property
def slack_info(self):
status = [":x: No better version available", ":gear: Verification pending", ":white_check_mark: Verified by human"][self.verified + 1]
content = "\n>" + "\n>".join(self.summary.split("\n"))
file_status, msg = self.file_status()
if not file_status:
return [msg]
# everything alright: generate real content
# first the base file
if self.file_name[-4:] == ".pdf":
answer = [{ # main reply with the base pdf
"reply_text" : f"*{self.title}*\n{status}\n{content}",
"file_path" : self.save_path + self.file_name
}]
else: # don't upload if the file is too big!
location = f"Not uploaded to slack, but the file will be on the NAS:\n`{self.fname_nas}`"
answer = [{ # main reply with the base pdf
"reply_text" : f"*{self.title}*\n{status}\n{content}\n{location}",
"file_path" : None
}]
# then the related files
rel_text = ""
for r in self.related:
fname = r.related_file_name
lentry = "\n• `{}` ".format(self.fname_nas(fname))
if fname[-4:] == ".pdf": # this is a manageable file, directly upload
f_ret = self.save_path + fname
answer.append({"reply_text":"", "file_path" : f_ret})
else: # not pdf <=> too large. Don't upload but mention its existence
lentry += "(not uploaded to slack, but the file will be on the NAS)"
rel_text += lentry
if rel_text:
rel_text = answer[0]["reply_text"] = answer[0]["reply_text"] + "\nRelated files:\n" + rel_text
return answer
@property
def mail_info(self):
base = [{"reply_text": f"[{self.article_url}]({self.article_url})\n", "file_path":None}] + self.slack_info
return [{"reply_text": markdown.markdown(m["reply_text"]), "file_path": m["file_path"]} for m in base]
def set_authors(self, authors):
for a in authors:
if len(a) < 100: # otherwise it's a mismatched string
ArticleAuthor.create(
article = self,
author = a
)
def set_related(self, related):
for r in related:
if len(r) > 255:
raise Exception("Related file name too long for POSTGRES")
ArticleRelated.create(
article = self,
related_file_name = r
)
def file_status(self):
if not self.file_name:
logger.error(f"Article {self} has no filename!")
return False, {"reply_text": "Download failed, no file was saved.", "file_path": None}
file_path_abs = self.save_path + self.file_name
if not os.path.exists(file_path_abs):
logger.error(f"Article {self} has a filename, but the file does not exist at that location!")
return False, {"reply_text": "Can't find file. Either the download failed or the file was moved.", "file_path": None}
return True, {}
class ArticleAuthor(DownloadBaseModel):
article = ForeignKeyField(ArticleDownload, backref='authors')
author = CharField()
class ArticleRelated(DownloadBaseModel):
# Related files, such as the full text of a paper, audio files, etc.
article = ForeignKeyField(ArticleDownload, backref='related')
related_file_name = TextField(default = '')
def set_db(download_db_object):
download_db.initialize(download_db_object)
with download_db: # create tables (does nothing if they exist already)
download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated])