import logging logger = logging.getLogger(__name__) from peewee import * import os import markdown import configuration import datetime from . import helpers config = configuration.main_config["DOWNLOADS"] slack_config = configuration.main_config["SLACK"] FILE_SIZE_THRESHOLD = 15 * 1024 * 1024 # 15MB # set the nature of the db at runtime download_db = DatabaseProxy() class DownloadBaseModel(Model): class Meta: database = download_db ## == Article related models == ## class ArticleDownload(DownloadBaseModel): # in the beginning this is all we have article_url = TextField(default = '', unique=True) # fetch then fills in the metadata title = TextField(default='') @property def is_title_bad(self): # add incrementally return "PUR-Abo" in self.title \ or "Redirecting" in self.title \ or "Error while running fetch" in self.title summary = TextField(default = '') source_name = CharField(default = '') language = CharField(default = '') file_name = TextField(default = '') @property def save_path(self): return f"{config['local_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/" @property def fname_nas(self, file_name=""): if self.download_date: if file_name: return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{file_name}" else: # return the self. name return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{self.file_name}" else: return None @property def fname_template(self): if "youtube.com" in self.source_name or "youtu.be" in self.source_name: fname = f"{self.source_name} -- {self.title}" else: fname = f"{self.source_name} -- {self.title}.pdf" return helpers.clear_path_name(fname) archive_url = TextField(default = '') pub_date = DateField(default = datetime.date.fromtimestamp(0)) download_date = DateField(default = datetime.date.today) slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by @property def slack_ts_full(self): str_ts = str(self.slack_ts) cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals return f"{str_ts}{cut_zeros * '0'}" sent = BooleanField(default = False) archived_by = CharField(default = os.getenv("UNAME")) # need to know who saved the message because the file needs to be on their computer in order to get verified # verification happens in a different app, but the model has the fields here as well comment = TextField(default = '') verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad # authors # keywords # ... are added through foreignkeys # we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db ## Helpers specific to a single article def __str__(self) -> str: if self.title != '' and self.source_name != '': desc = f"{helpers.shorten_name(self.title)} -- {self.source_name}" else: desc = f"{self.article_url}" return f"ART [{desc}]" def mail_info(self): summary = "\n> " + "\n> ".join(self.summary.split("\n")) answer_text = f"[{self.article_url}]({self.article_url})\n\n" # first the url answer_files = [] # displays the summary in a blockquote status = self.file_status if status == 1: # file_name was empty return None # there has been an error do not send any message elif status == 2: # no file found at specified location answer_text += f"*{self.title}*\n{summary}\nFilename: {self.file_name}" elif status == 3: # file found but deemed too big location = f"File not sent directly. Location on NAS:\n`{self.fname_nas}`" answer_text += f"*{self.title}*\n{summary}\n{location}" else: # everything nominal answer_text += f"*{self.title}*\n{summary}" answer_files.append(self.save_path + self.file_name) # then the related files if self.related: rel_text = "Related files on NAS:" for r in self.related: fname = r.related_file_name rel_text += f"\n• `{self.fname_nas(fname)}` " answer_text += "\n\n" + rel_text return markdown.markdown(answer_text), answer_files def set_authors(self, authors): for a in authors: if len(a) < 100: # otherwise it's a mismatched string ArticleAuthor.create( article = self, author = a ) def set_related(self, related): for r in related: if len(r) > 255: raise Exception("Related file name too long for POSTGRES") ArticleRelated.create( article = self, related_file_name = r ) @property def file_status(self): """0 = file exists, 1 = no file name!, 2 = file does not exit,3 = file exists but is too large""" if not self.file_name: logger.error(f"Article {self} has no filename!") return 2 file_path_abs = self.save_path + self.file_name if not os.path.exists(file_path_abs): logger.error(f"Article {self} has a filename, but the file does not exist at that location!") return 2 if (os.path.splitext(file_path_abs)[1] != ".pdf") or (os.path.getsize(file_path_abs) > FILE_SIZE_THRESHOLD): logger.warning(f"Article {self} has a file that exceeds the file size limit.") return 3 class ArticleAuthor(DownloadBaseModel): article = ForeignKeyField(ArticleDownload, backref='authors') author = CharField() class ArticleRelated(DownloadBaseModel): # Related files, such as the full text of a paper, audio files, etc. article = ForeignKeyField(ArticleDownload, backref='related') related_file_name = TextField(default = '') def set_db(download_db_object): download_db.initialize(download_db_object) with download_db: # create tables (does nothing if they exist already) download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated])