import logging logger = logging.getLogger(__name__) from peewee import * import os import markdown import configuration import datetime from . import helpers downloads_config = configuration.config["downloads"] FILE_SIZE_THRESHOLD = 15 * 1024 * 1024 # 15MB # set the nature of the db at runtime download_db = DatabaseProxy() class DownloadBaseModel(Model): class Meta: database = download_db ## == Article related models == ## class ArticleDownload(DownloadBaseModel): # in the beginning this is all we have article_url = TextField(default = '', unique=True) # fetch then fills in the metadata title = TextField(default='') @property def is_title_bad(self): # add incrementally return "PUR-Abo" in self.title \ or "Redirecting" in self.title \ or "Error while running fetch" in self.title \ or self.title == "" summary = TextField(default = '') source_name = CharField(default = '') language = CharField(default = '') file_name = TextField(default = '') @property def save_path(self): return f"{downloads_config['local_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/" @property def fname_nas(self, file_name=""): if self.download_date: if file_name: return f"NAS: {downloads_config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{file_name}" else: # return the self. name return f"NAS: {downloads_config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{self.file_name}" else: return None @property def fname_template(self): if "youtube.com" in self.source_name or "youtu.be" in self.source_name: fname = f"{self.source_name} -- {self.title}" else: fname = f"{self.source_name} -- {self.title}.pdf" return helpers.clear_path_name(fname) archive_url = TextField(default = '') pub_date = DateField(default = datetime.date.fromtimestamp(0)) download_date = DateField(default = datetime.date.today) slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by @property def slack_ts_full(self): str_ts = str(self.slack_ts) cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals return f"{str_ts}{cut_zeros * '0'}" sent = BooleanField(default = False) archived_by = CharField(default = os.getenv("UNAME")) # need to know who saved the message because the file needs to be on their computer in order to get verified # verification happens in a different app, but the model has the fields here as well comment = TextField(default = '') verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad # authors # keywords # ... are added through foreignkeys # we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db ## Helpers specific to a single article def __str__(self) -> str: if self.title != '' and self.source_name != '': desc = f"{helpers.shorten_name(self.title)} -- {self.source_name}" else: desc = f"{self.article_url}" return f"ART [{desc}]" def mail_info(self): summary = "\n> " + "\n> ".join(self.summary.split("\n")) answer_text = f"[{self.article_url}]({self.article_url})\n\n" # first the url answer_files = [] # displays the summary in a blockquote try: self.ensure_file_present() answer_text += f"*{self.title}*\n{summary}" answer_files.append(self.save_path + self.file_name) except Exception as e: msg = e.args[0] logger.error(f"Article {self} has file-issues: {msg}") if "file too big" in msg: location = f"File too big to send directly. Location on NAS:\n`{self.fname_nas}`" answer_text += f"*{self.title}*\n{summary}\n{location}" else: # file not found, or filename not set raise e # reraise the exception, so that the caller can handle it # then the related files if self.related: rel_text = "Related files on NAS:" for r in self.related: fname = r.related_file_name rel_text += f"\n• `{self.fname_nas(fname)}` " answer_text += "\n\n" + rel_text return markdown.markdown(answer_text), answer_files def set_authors(self, authors): for a in authors: if len(a) < 100: # otherwise it's a mismatched string ArticleAuthor.create( article = self, author = a ) def set_related(self, related): for r in related: if len(r) > 255: raise Exception("Related file name too long for POSTGRES") ArticleRelated.create( article = self, related_file_name = r ) def ensure_file_present(self): if not self.file_name: raise Exception("no filename") file_path_abs = self.save_path + self.file_name if not os.path.exists(file_path_abs): raise Exception("file not found") if (os.path.splitext(file_path_abs)[1] != ".pdf") or (os.path.getsize(file_path_abs) > FILE_SIZE_THRESHOLD): raise Exception("file too big") class ArticleAuthor(DownloadBaseModel): article = ForeignKeyField(ArticleDownload, backref='authors') author = CharField() class ArticleRelated(DownloadBaseModel): # Related files, such as the full text of a paper, audio files, etc. article = ForeignKeyField(ArticleDownload, backref='related') related_file_name = TextField(default = '') def set_db(download_db_object): download_db.initialize(download_db_object) with download_db: # create tables (does nothing if they exist already) download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated])