new component - upload to NAS

This commit is contained in:
Remy Moll
2022-07-23 17:21:00 +02:00
parent 79e3f54955
commit 8e46f30f07
29 changed files with 132 additions and 63 deletions

View File

@@ -0,0 +1,277 @@
import logging
import configuration
import requests
import os
import time
from threading import Thread
from slack_sdk.errors import SlackApiError
logger = logging.getLogger(__name__)
config = configuration.parsed["SLACK"]
models = configuration.models
slack_client = "dummy"
LATEST_RECORDED_REACTION = 0
def init(client) -> None:
global slack_client
slack_client = client
global LATEST_RECORDED_REACTION
try:
LATEST_RECORDED_REACTION = models.Reaction.select(models.Reaction.id).order_by("id")[-1]
except IndexError: #query is actually empty, we have never fetched any messages until now
LATEST_RECORDED_REACTION = 0
# fetch all te messages we could have possibly missed
logger.info("Querying missed messages, threads and reactions. This can take some time.")
fetch_missed_channel_messages() # not threaded
t = Thread(target = fetch_missed_channel_reactions) # threaded, runs in background (usually takes a long time)
t.start()
if os.getenv("REDUCEDFETCH", "false") == "true":
logger.warning("Only fetching empty threads for bot messages because 'REDUCEDFETCH=true'")
fetch_missed_thread_messages(reduced=True)
else: # perform both asyncronously
fetch_missed_thread_messages()
def get_unhandled_messages():
"""Gets all messages that have not yet been handled, be it by mistake or by downtime
As the message handler makes no distinction between channel messages and thread messages,
we don't have to worry about them here.
"""
threaded_objects = []
for t in models.Thread.select():
if t.message_count > 1: # if only one message was written, it is the channel message
msg = t.last_message
if msg.is_by_human:
threaded_objects.append(msg)
# else don't, nothing to process
logger.info(f"Set {len(threaded_objects)} thread-messages as not yet handled.")
channel_objects = [t.initiator_message for t in models.Thread.select() if (t.message_count == 1 and not t.is_fully_processed)]
logger.info(f"Set {len(channel_objects)} channel-messages as not yet handled.")
reaction_objects = list(models.Reaction.select().where(models.Reaction.id > LATEST_RECORDED_REACTION))
logger.info(f"Set {len(reaction_objects)} reactions as not yet handled.")
# the ones newer than the last before the fetch
all_messages = channel_objects + threaded_objects
return all_messages, reaction_objects
def fetch_missed_channel_messages():
# latest processed message_ts is:
presaved = models.Message.select().order_by(models.Message.ts)
if not presaved:
last_ts = 0
else:
last_message = presaved[-1]
last_ts = last_message.slack_ts
result = slack_client.conversations_history(
channel=config["archive_id"],
oldest=last_ts
)
new_messages = result.get("messages", [])
# # filter the last one, it is a duplicate! (only if the db is not empty!)
# if last_ts != 0 and len(new_messages) != 0:
# new_messages.pop(-1)
new_fetches = 0
for m in new_messages:
# print(m)
message_dict_to_model(m)
new_fetches += 1
refetch = result.get("has_more", False)
while refetch: # we have not actually fetched them all
try:
result = slack_client.conversations_history(
channel = config["archive_id"],
cursor = result["response_metadata"]["next_cursor"],
oldest = last_ts
) # fetches 100 messages, older than the [-1](=oldest) element of new_fetches
refetch = result.get("has_more", False)
new_messages = result.get("messages", [])
for m in new_messages:
message_dict_to_model(m)
new_fetches += 1
except SlackApiError: # Most likely a rate-limit
logger.error("Error while fetching channel messages. (likely rate limit) Retrying in {} seconds...".format(config["api_wait_time"]))
time.sleep(config["api_wait_time"])
refetch = True
logger.info(f"Fetched {new_fetches} new channel messages.")
def fetch_missed_thread_messages(reduced=False):
"""After having gotten all base-threads, we need to fetch all their replies"""
# I don't know of a better way: we need to fetch this for each and every thread (except if it is marked as permanently solved)
logger.info("Starting fetch of thread messages...")
if reduced:
threads = [t for t in models.Thread.select() if (t.message_count == 1 and not t.is_fully_processed)]
# this only fetches completely empty threads, which might be because the bot-message was not yet saved to the db.
# once we got all the bot-messages the remaining empty threads will be the ones we need to process.
else:
threads = [t for t in models.Thread.select() if not t.is_fully_processed]
logger.info(f"Fetching history for {len(threads)} empty threads")
new_messages = []
for i,t in enumerate(threads):
try:
messages = slack_client.conversations_replies(
channel = config["archive_id"],
ts = t.slack_ts,
oldest = t.messages[-1].slack_ts
)["messages"]
except SlackApiError:
logger.error("Hit rate limit while querying threaded messages, retrying in {}s ({}/{} queries elapsed)".format(config["api_wait_time"], i, len(threads)))
time.sleep(int(config["api_wait_time"]))
messages = slack_client.conversations_replies(
channel = config["archive_id"],
ts = t.slack_ts,
oldest = t.messages[-1].slack_ts
)["messages"]
messages.pop(0) # the first message is the one posted in the channel. We already processed it!
for m in messages:
# only append *new* messages
res = message_dict_to_model(m)
if res:
new_messages.append(res)
logger.info("Fetched {} new threaded messages.".format(len(new_messages)))
def fetch_missed_channel_reactions():
logger.info("Starting background fetch of channel reactions...")
threads = [t for t in models.Thread.select() if not t.is_fully_processed]
for i,t in enumerate(threads):
try:
query = slack_client.reactions_get(
channel = config["archive_id"],
timestamp = t.slack_ts
)
reactions = query.get("message", []).get("reactions", []) # default = []
except SlackApiError: # probably a rate_limit:
logger.error("Hit rate limit while querying reactions. retrying in {}s ({}/{} queries elapsed)".format(config["api_wait_time"], i, len(threads)))
time.sleep(int(config["api_wait_time"]))
reactions = query.get("message", []).get("reactions", [])
for r in reactions:
reaction_dict_to_model(r, t)
# Helpers for message conversion to db-objects
def reaction_dict_to_model(reaction, thread=None):
if thread is None:
m_ts = reaction["item"]["ts"]
message = models.Message.get(ts = float(m_ts))
thread = message.thread
if "name" in reaction.keys(): # fetched through manual api query
content = reaction["name"]
elif "reaction" in reaction.keys(): # fetched through events
content = reaction["reaction"]
else:
logger.error(f"Weird reaction received: {reaction}")
return None
r, _ = models.Reaction.get_or_create(
type = content,
message = thread.initiator_message
)
logger.info("Saved reaction [{}]".format(content))
return r
def message_dict_to_model(message):
if message["type"] == "message":
thread_ts = message["thread_ts"] if "thread_ts" in message else message["ts"]
uid = message.get("user", "BAD USER")
if uid == "BAD USER":
logger.critical("Message has no user?? {}".format(message))
return None
user, _ = models.User.get_or_create(user_id = uid)
thread, _ = models.Thread.get_or_create(thread_ts = thread_ts)
m, new = models.Message.get_or_create(
user = user,
thread = thread,
ts = message["ts"],
channel_id = config["archive_id"],
text = message["text"]
)
logger.info(f"Saved: {m} ({'new' if new else 'old'})")
files = message.get("files", [])
if len(files) >= 1:
f = files[0] #default: []
m.file_type = f["filetype"]
m.perma_link = f["url_private_download"]
m.save()
logger.info(f"Saved {m.file_type}-file for message (id={m.id})")
if new:
return m
else:
return None
else:
logger.warning("What should I do of {}".format(message))
return None
def say_substitute(*args, **kwargs):
logger.info("Now sending message through say-substitute: {}".format(" - ".join(args)))
slack_client.chat_postMessage(
channel=config["archive_id"],
text=" - ".join(args),
**kwargs
)
def save_as_related_file(url, article_object):
r = requests.get(url, headers={"Authorization": "Bearer {}".format(slack_client.token)})
saveto = article_object.save_path
ftype = url[url.rfind(".") + 1:]
fname = "{} - related no {}.{}".format(
article_object.file_name.replace(".pdf",""),
len(article_object.related) + 1,
ftype
)
with open(os.path.join(saveto, fname), "wb") as f:
f.write(r.content)
article_object.set_related([fname])
logger.info("Added {} to model {}".format(fname, article_object))
return fname
def react_file_path_message(fname, article_object):
saveto = article_object.save_path
file_path = os.path.join(saveto, fname)
if os.path.exists(file_path):
article_object.set_related([fname])
logger.info("Added {} to model {}".format(fname, article_object))
return True
else:
return False
def is_message_in_archiving(message) -> bool:
if isinstance(message, dict):
return message["channel"] == config["archive_id"]
else:
return message.channel_id == config["archive_id"]
def is_reaction_in_archiving(event) -> bool:
if isinstance(event, dict):
return event["item"]["channel"] == config["archive_id"]
else:
return event.message.channel_id == config["archive_id"]

View File

@@ -0,0 +1,184 @@
from slack_bolt import App
from slack_bolt.adapter.socket_mode import SocketModeHandler
import logging
import configuration
from . import message_helpers
config = configuration.parsed["SLACK"]
models = configuration.models
class BotApp(App):
logger = logging.getLogger(__name__)
def __init__(self, callback, *args, **kwargs):
super().__init__(*args, **kwargs)
self.callback = callback
def start(self):
message_helpers.init(self.client)
missed_messages, missed_reactions = message_helpers.get_unhandled_messages()
[self.handle_incoming_message(m) for m in missed_messages]
[self.handle_incoming_reaction(r) for r in missed_reactions]
# self.react_missed_reactions(missed_reactions)
# self.react_missed_messages(missed_messages)
self.startup_status()
def handle_incoming_reaction(self, reaction):
if isinstance(reaction, dict): #else: the reaction is already being passed as a model
# CAUTION: filter for 'changed reactions' those are nasty (usually when adding an url)
reaction = message_helpers.reaction_dict_to_model(reaction)
thread = reaction.message.thread
article_object = thread.article
if not article_object is None:
reaction = reaction.type
status = 1 if reaction == "white_check_mark" else -1
# self.logger.info(f"Applying reaction {reaction} to its root message.")
article_object.verified = status
article_object.save()
def handle_incoming_message(self, message):
"""Reacts to all messages inside channel archiving. Must then
distinguish between threaded replies and new requests
and react accordingly"""
if isinstance(message, dict): #else: the message is already being passed as a model
# CAUTION: filter for 'changed messages' those are nasty (usually when adding an url)
if message.get("subtype", "not bad") == "message_changed":
return False
message = message_helpers.message_dict_to_model(message)
# First check: belongs to thread?
is_threaded = message.thread.message_count > 1 and message != message.thread.initiator_message
if is_threaded:
self.incoming_thread_message(message)
else:
self.incoming_channel_message(message)
def incoming_thread_message(self, message):
if message.user.user_id == config["bot_id"]:
return True # ignore the files uploaded by the bot. We handled them already!
thread = message.thread
if thread.is_fully_processed:
return True
self.logger.info("Receiving thread-message")
self.respond_thread_message(message)
def incoming_channel_message(self, message):
self.logger.info(f"Handling message {message} ({len(message.urls)} urls)")
if not message.urls: # no urls in a root-message => IGNORE
message.is_processed_override = True
message.save()
return
# ensure thread is still empty, this is a scenario encountered only in testing, but let's just filter it
if message.thread.message_count > 1:
self.logger.info("Discarded message because it is actually processed.")
return
if len(message.urls) > 1:
message_helpers.say_substitute("Only the first url is being handled. Please send any subsequent url as a separate message", thread_ts=message.thread.slack_ts)
self.callback(message)
# for url in message.urls:
# self.callback(url, message)
# stop here!
def respond_thread_message(self, message, say=message_helpers.say_substitute):
thread = message.thread
article = thread.article
if message.perma_link: # file upload means new data
fname = message_helpers.save_as_related_file(message.perma_link, article)
say("File was saved as 'related file' under `{}`.".format(fname),
thread_ts=thread.slack_ts
)
else: # either a pointer to a new file (too large to upload), or trash
success = message_helpers.react_file_path_message(message.text, article)
if success:
say("File was saved as 'related file'", thread_ts=thread.slack_ts)
else:
self.logger.error("User replied to thread {} but the response did not contain a file/path".format(thread))
say("Cannot process response without associated file.",
thread_ts=thread.slack_ts
)
def respond_channel_message(self, thread, say=message_helpers.say_substitute):
article = thread.article
answers = article.slack_info
for a in answers:
if a["file_path"]:
try: # either, a["file_path"] does not exist, or the upload resulted in an error
self.client.files_upload(
channels = config["archive_id"],
initial_comment = f"<@{config['responsible_id']}> \n {a['reply_text']}",
file = a["file_path"],
thread_ts = thread.slack_ts
)
status = True
except:
say(
"File {} could not be uploaded.".format(a),
thread_ts=thread.slack_ts
)
status = False
else: # anticipated that there is no file!
say(
f"<@{config['responsible_id']}> \n {a['reply_text']}",
thread_ts=thread.slack_ts
)
status = True
def startup_status(self):
threads = [t for t in models.Thread.select()]
all_threads = len(threads)
fully_processed = len([t for t in threads if t.is_fully_processed])
fully_unprocessed = len([t for t in threads if t.message_count == 1])
articles_unprocessed = len(models.ArticleDownload.select().where(models.ArticleDownload.verified < 1))
self.logger.info(f"[bold]STATUS[/bold]: Fully processed {fully_processed}/{all_threads} threads. {fully_unprocessed} threads have 0 replies. Article-objects to verify: {articles_unprocessed}", extra={"markup": True})
class BotRunner():
"""Stupid encapsulation so that we can apply the slack decorators to the BotApp"""
def __init__(self, callback, *args, **kwargs) -> None:
self.bot_worker = BotApp(callback, token=config["auth_token"])
@self.bot_worker.event(event="message", matchers=[message_helpers.is_message_in_archiving])
def handle_incoming_message(message, say):
return self.bot_worker.handle_incoming_message(message)
@self.bot_worker.event(event="reaction_added", matchers=[message_helpers.is_reaction_in_archiving])
def handle_incoming_reaction(event, say):
return self.bot_worker.handle_incoming_reaction(event)
# target = self.launch
# super().__init__(target=target)
def start(self):
self.bot_worker.start()
SocketModeHandler(self.bot_worker, config["app_token"]).start()
# def respond_to_message(self, message):
# self.bot_worker.handle_incoming_message(message)