few bugs in news_fetch left, news_chek wip
This commit is contained in:
		| @@ -4,7 +4,6 @@ logger = logging.getLogger(__name__) | ||||
| from peewee import * | ||||
| import os | ||||
| import markdown | ||||
| import re | ||||
| import configuration | ||||
| import datetime | ||||
|  | ||||
| @@ -28,7 +27,7 @@ class ArticleDownload(DownloadBaseModel): | ||||
|     article_url = TextField(default = '', unique=True) | ||||
|      | ||||
|     # fetch then fills in the metadata | ||||
|     title = CharField(default='') | ||||
|     title = TextField(default='') | ||||
|     @property | ||||
|     def is_title_bad(self):  # add incrementally | ||||
|         return "PUR-Abo" in self.title \ | ||||
| @@ -63,7 +62,7 @@ class ArticleDownload(DownloadBaseModel): | ||||
|  | ||||
|      | ||||
|     archive_url = TextField(default = '') | ||||
|     pub_date = DateField(default = '') | ||||
|     pub_date = DateField(default = datetime.date.fromtimestamp(0)) | ||||
|     download_date = DateField(default = datetime.date.today) | ||||
|  | ||||
|     slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by | ||||
| @@ -143,13 +142,17 @@ class ArticleDownload(DownloadBaseModel): | ||||
|  | ||||
|     def set_authors(self, authors): | ||||
|         for a in authors: | ||||
|             ArticleAuthor.create( | ||||
|                 article = self, | ||||
|                 author = a | ||||
|                 ) | ||||
|             if len(a) < 100: # otherwise it's a mismatched string | ||||
|                 ArticleAuthor.create( | ||||
|                     article = self, | ||||
|                     author = a | ||||
|                     ) | ||||
|  | ||||
|     def set_related(self, related): | ||||
|         for r in related: | ||||
|             if len(r) > 255: | ||||
|                 raise Exception("Related file name too long for POSTGRES") | ||||
|  | ||||
|             ArticleRelated.create( | ||||
|                 article = self, | ||||
|                 related_file_name = r | ||||
| @@ -182,116 +185,7 @@ class ArticleRelated(DownloadBaseModel): | ||||
|  | ||||
|  | ||||
|  | ||||
| # class Thread(ChatBaseModel): | ||||
| #     """The threads that concern us are only created if the base massage contains a url""" | ||||
| #     thread_ts = FloatField(default = 0) | ||||
| #     article = ForeignKeyField(ArticleDownload, backref="slack_thread", null=True, default=None) | ||||
| #     # provides, ts, user, models | ||||
| #     # messages | ||||
|  | ||||
| #     @property | ||||
| #     def slack_ts(self): | ||||
| #         str_ts = str(self.thread_ts) | ||||
| #         cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem! | ||||
| #         return "{}{}".format(str_ts, cut_zeros*"0") | ||||
|  | ||||
| #     @property | ||||
| #     def initiator_message(self): | ||||
| #         try: | ||||
| #             return self.messages[0] # TODO check if this needs sorting | ||||
| #         except IndexError: | ||||
| #             logger.warning(f"Thread {self} is empty. How can that be?") | ||||
| #             return None | ||||
|  | ||||
| #     @property | ||||
| #     def message_count(self): | ||||
| #         # logger.warning("message_count was called") | ||||
| #         return self.messages.count() | ||||
|  | ||||
| #     @property | ||||
| #     def last_message(self): | ||||
| #         messages = Message.select().where(Message.thread == self).order_by(Message.ts) # can't be empty by definition/creation | ||||
| #         return messages[-1] | ||||
|  | ||||
| #     @property | ||||
| #     def is_fully_processed(self) -> bool: | ||||
| #         init_message = self.initiator_message | ||||
| #         if init_message is None: | ||||
| #             return False | ||||
|          | ||||
| #         if init_message.is_processed_override: | ||||
| #             return True | ||||
| #         # this override is set for instance, when no url was sent at all. Then set this thread to be ignored | ||||
|          | ||||
| #         reactions = init_message.reaction | ||||
| #         if not reactions: | ||||
| #             return False | ||||
| #         else: | ||||
| #             r = reactions[0].type # can and should only have one reaction | ||||
| #             return r == "white_check_mark" \ | ||||
| #                 or r == "x" | ||||
|  | ||||
|  | ||||
|      | ||||
| # class Message(ChatBaseModel): | ||||
| #     ts = FloatField(unique=True) #for sorting | ||||
| #     channel_id = CharField(default='') | ||||
| #     user = ForeignKeyField(User, backref="messages") | ||||
| #     text = TextField(default='') | ||||
| #     thread = ForeignKeyField(Thread, backref="messages", default=None) | ||||
| #     file_type = CharField(default='') | ||||
| #     perma_link = CharField(default='') | ||||
| #     is_processed_override = BooleanField(default=False) | ||||
| #     # reaction | ||||
|  | ||||
| #     def __str__(self) -> str: | ||||
| #         return "MSG [{}]".format(shorten_name(self.text).replace('\n','/')) | ||||
|  | ||||
| #     @property | ||||
| #     def slack_ts(self): | ||||
| #         str_ts = str(self.ts) | ||||
| #         cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem! | ||||
| #         return "{}{}".format(str_ts, cut_zeros * "0") | ||||
|  | ||||
|  | ||||
| #     @property | ||||
| #     def urls(self): | ||||
| #         pattern = r"<(.*?)>" | ||||
| #         matches = re.findall(pattern, self.text) | ||||
| #         matches = [m for m in matches if "." in m] | ||||
|          | ||||
| #         new_matches = [] | ||||
| #         for m in matches: | ||||
| #             if "." in m:  # must contain a tld, right? | ||||
| #                 # further complication: slack automatically abreviates urls in the format:  | ||||
| #                 # <url|link preview>. Lucky for us, "|" is a character derecommended in urls, meaning we can "safely" split for it and retain the first half | ||||
| #                 if "|" in m: | ||||
| #                     keep = m.split("|")[0] | ||||
| #                 else: | ||||
| #                     keep = m | ||||
| #                 new_matches.append(keep) | ||||
| #         return new_matches | ||||
|      | ||||
| #     @property | ||||
| #     def is_by_human(self): | ||||
| #         return self.user.user_id != slack_config["bot_id"] | ||||
|  | ||||
|      | ||||
| #     @property | ||||
| #     def has_single_url(self): | ||||
| #         return len(self.urls) == 1 | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| def set_db(download_db_object): | ||||
|     download_db.initialize(download_db_object) | ||||
|     with download_db: # create tables (does nothing if they exist already) | ||||
|         download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated]) | ||||
|  | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user