Fixed bug where duplicate threads would not get responded

2022-04-26 10:32:41 +02:00
parent 024da446e7
commit 246729d376
6 changed files with 56 additions and 41 deletions
--- a/app/utils_slack/message_helpers.py
+++ b/app/utils_slack/message_helpers.py
@@ -3,8 +3,8 @@ import configuration
 import requests
 import os
 import time
-import asyncio
 import sys
+from threading import Thread
 from slack_sdk.errors import SlackApiError

 logger = logging.getLogger(__name__)
@@ -18,26 +18,29 @@ def init(client) -> None:
    global slack_client
    slack_client = client

-    # config["archive_id"] = channel_id
+    global LATEST_RECORDED_REACTION
    try:
        LATEST_RECORDED_REACTION = models.Reaction.select(models.Reaction.id).order_by("id")[-1]
    except IndexError: #query is actually empty, we have never fetched any messages until now
        LATEST_RECORDED_REACTION = 0    
-    # fetch all te messages we could have possibly missed
    
+    # fetch all te messages we could have possibly missed
    logger.info("Querying missed messages, threads and reactions. This can take some time.")
-    fetch_missed_channel_messages()
-    if "nofetch" in sys.argv:
-        logger.info("Omitted update of reactions and thread messages because of argument 'nofetch'.")
+    fetch_missed_channel_messages() # not threaded
+    t = Thread(target = fetch_missed_channel_reactions) # threaded, runs in background (usually takes a long time)
+    t.start()
+
+    if "reducedfetch" in sys.argv:
+        logger.warning("Only fetching empty threads for bot messages because of argument 'reducedfetch'")
+        fetch_missed_thread_messages(reduced=True)
    else:    # perform these two asyncronously
-        async def run_async():
-            await asyncio.gather(fetch_missed_channel_reactions(), fetch_missed_thread_messages())
-        asyncio.run(run_async())
+        fetch_missed_thread_messages()
+    


-def get_past_messages():
+def get_unhandled_messages():
    """Gets all messages that have not yet been handled, be it by mistake or by downtime
-    As the message handler mkaes no distinction between channel messages and thread messages,
+    As the message handler makes no distinction between channel messages and thread messages,
    we don't have to worry about them here.
    """

@@ -51,10 +54,11 @@ def get_past_messages():
    logger.info(f"Set {len(threaded_objects)} thread-messages as not yet handled.")


-    channel_objects = [t.initiator_message for t in models.Thread.select() if t.message_count == 1 and not t.is_fully_processed]
+    channel_objects = [t.initiator_message for t in models.Thread.select() if (t.message_count == 1 and not t.is_fully_processed)]
    logger.info(f"Set {len(channel_objects)} channel-messages as not yet handled.")
    
    reaction_objects = list(models.Reaction.select().where(models.Reaction.id > LATEST_RECORDED_REACTION))
+    logger.info(f"Set {len(reaction_objects)} reactions as not yet handled.")
    # the ones newer than the last before the fetch
    
    all_messages = channel_objects + threaded_objects
@@ -108,11 +112,17 @@ def fetch_missed_channel_messages():
    logger.info(f"Fetched {new_fetches} new channel messages.")


-async def fetch_missed_thread_messages():
+def fetch_missed_thread_messages(reduced=False):
    """After having gotten all base-threads, we need to fetch all their replies"""        
    # I don't know of a better way: we need to fetch this for each and every thread (except if it is marked as permanently solved)
-    logger.info("Starting async fetch of thread messages...")
-    threads = [t for t in models.Thread.select() if not t.is_fully_processed]
+    logger.info("Starting fetch of thread messages...")
+    if reduced:
+        threads = [t for t in models.Thread.select() if (t.message_count == 1 and not t.is_fully_processed)]
+        # this only fetches completely empty threads, which might be because the bot-message was not yet saved to the db.
+        # once we got all the bot-messages the remaining empty threads will be the ones we need to process.
+    else:
+        threads = [t for t in models.Thread.select() if not t.is_fully_processed]
+    logger.info(f"Fetching history for {len(threads)} empty threads")
    new_messages = []
    for i,t in enumerate(threads):
        try:
@@ -123,7 +133,7 @@ async def fetch_missed_thread_messages():
            )["messages"]
        except SlackApiError:
            logger.error("Hit rate limit while querying threaded messages, retrying in {}s ({}/{} queries elapsed)".format(config["api_wait_time"], i, len(threads)))
-            await asyncio.sleep(int(config["api_wait_time"]))
+            time.sleep(int(config["api_wait_time"]))
            messages = slack_client.conversations_replies(
                channel = config["archive_id"],
                ts = t.slack_ts,
@@ -140,8 +150,8 @@ async def fetch_missed_thread_messages():
    logger.info("Fetched {} new threaded messages.".format(len(new_messages)))


-async def fetch_missed_channel_reactions():
-    logger.info("Starting async fetch of channel reactions...")
+def fetch_missed_channel_reactions():
+    logger.info("Starting background fetch of channel reactions...")
    threads = [t for t in models.Thread.select() if not t.is_fully_processed]
    for i,t in enumerate(threads):
        try:
@@ -152,7 +162,7 @@ async def fetch_missed_channel_reactions():
            reactions = query["message"].get("reactions", []) # default = []
        except SlackApiError: # probably a rate_limit:
            logger.error("Hit rate limit while querying reactions. retrying in {}s ({}/{} queries elapsed)".format(config["api_wait_time"], i, len(threads)))
-            await asyncio.sleep(int(config["api_wait_time"]))
+            time.sleep(int(config["api_wait_time"]))
            reactions = query["message"].get("reactions", [])

        for r in reactions:
--- a/app/utils_slack/runner.py
+++ b/app/utils_slack/runner.py
@@ -1,9 +1,7 @@
-from threading import Thread
 from slack_bolt import App
 from slack_bolt.adapter.socket_mode import SocketModeHandler

 import logging
-from rich.rule import Rule
 import configuration

 from . import message_helpers
@@ -18,12 +16,11 @@ class BotApp(App):
    def __init__(self, callback, *args, **kwargs):

        super().__init__(*args, **kwargs)
-        # models = models
        self.callback = callback

    def start(self):
        message_helpers.init(self.client)
-        missed_messages, missed_reactions = message_helpers.get_past_messages()
+        missed_messages, missed_reactions = message_helpers.get_unhandled_messages()

        [self.handle_incoming_message(m) for m in missed_messages]
        [self.handle_incoming_reaction(r) for r in missed_reactions]
@@ -122,10 +119,8 @@ class BotApp(App):
                )


-    def respond_channel_message(self, article, say=message_helpers.say_substitute):
-        # extra={"markup": True}
-        # self.logger.info(Rule(url[:min(len(url), 30)]))
-        thread = article.slack_thread.execute()[0]
+    def respond_channel_message(self, thread, say=message_helpers.say_substitute):
+        article = thread.article
        answers = article.slack_info
        for a in answers:
            if a["file_path"]:
@@ -149,7 +144,6 @@ class BotApp(App):
                    thread_ts=thread.slack_ts
                )
                status = True
-        # self.logger.info(Rule(f"Fully handled (success={status})"))
        

    def startup_status(self):