220 lines
7.8 KiB
Python
220 lines
7.8 KiB
Python
from slack_bolt import App
|
|
from slack_bolt.adapter.socket_mode import SocketModeHandler
|
|
from slack_sdk.errors import SlackApiError
|
|
|
|
import logging
|
|
import re
|
|
import time
|
|
|
|
import configuration
|
|
config = configuration.main_config["SLACK"]
|
|
models = configuration.models
|
|
|
|
class MessageIsUnwanted(Exception):
|
|
# This exception is triggered when the message is either threaded (reply to another message) or weird (like an edit, a deletion, etc)
|
|
pass
|
|
|
|
class Message:
|
|
ts = str
|
|
user_id = str
|
|
text = str
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def __init__(self, message_dict):
|
|
if message_dict.get("subtype", "not bad") == "message_changed":
|
|
raise MessageIsUnwanted()
|
|
if message_dict["type"] == "message":
|
|
if "thread_ts" in message_dict and (message_dict["thread_ts"] != message_dict["ts"]): # meaning it's a reply to another message
|
|
raise MessageIsUnwanted()
|
|
|
|
self.user_id = message_dict.get("user", "BAD USER")
|
|
# self.channel_id = config["archive_id"] # by construction, other messages are not intercepted
|
|
self.ts = message_dict["ts"]
|
|
self.text = message_dict["text"]
|
|
|
|
else:
|
|
self.logger.warning(f"What should I do of {message_dict}")
|
|
raise MessageIsUnwanted()
|
|
|
|
|
|
def __str__(self) -> str:
|
|
return f"MSG [{self.text}]"
|
|
|
|
|
|
@property
|
|
def urls(self):
|
|
pattern = r"<(.*?)>"
|
|
matches = re.findall(pattern, self.text)
|
|
matches = [m for m in matches if "." in m] # must contain a tld, right?
|
|
|
|
new_matches = []
|
|
for m in matches:
|
|
# further complication: slack automatically abreviates urls in the format:
|
|
# <url|link preview>. Lucky for us, "|" is a character derecommended in urls, meaning we can "safely" split for it and retain the first half
|
|
if "|" in m:
|
|
keep = m.split("|")[0]
|
|
else:
|
|
keep = m
|
|
new_matches.append(keep)
|
|
return new_matches
|
|
|
|
@property
|
|
def is_by_human(self):
|
|
return self.user.user_id != config["bot_id"]
|
|
|
|
|
|
@property
|
|
def has_single_url(self):
|
|
return len(self.urls) == 1
|
|
|
|
|
|
|
|
class BotApp(App):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def __init__(self, callback, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self.callback = callback
|
|
|
|
|
|
def pre_start(self):
|
|
missed_messages = self.fetch_missed_channel_messages()
|
|
|
|
[self.handle_incoming_message(m) for m in missed_messages]
|
|
self.startup_status()
|
|
|
|
|
|
def say_substitute(self, *args, **kwargs):
|
|
self.client.chat_postMessage(
|
|
channel=config["archive_id"],
|
|
text=" - ".join(args),
|
|
**kwargs
|
|
)
|
|
|
|
def fetch_missed_channel_messages(self):
|
|
# latest processed message_ts is:
|
|
presaved = models.ArticleDownload.select().order_by(models.ArticleDownload.slack_ts.desc()).get_or_none()
|
|
if presaved is None:
|
|
last_ts = 0
|
|
else:
|
|
last_ts = presaved.slack_ts_full
|
|
|
|
result = self.client.conversations_history(
|
|
channel=config["archive_id"],
|
|
oldest=last_ts
|
|
)
|
|
|
|
new_messages = result.get("messages", [])
|
|
# # filter the last one, it is a duplicate! (only if the db is not empty!)
|
|
# if last_ts != 0 and len(new_messages) != 0:
|
|
# new_messages.pop(-1)
|
|
|
|
return_messages = [Message(m) for m in new_messages]
|
|
|
|
refetch = result.get("has_more", False)
|
|
while refetch: # we have not actually fetched them all
|
|
try:
|
|
result = self.client.conversations_history(
|
|
channel = config["archive_id"],
|
|
cursor = result["response_metadata"]["next_cursor"],
|
|
oldest = last_ts
|
|
) # fetches 100 messages, older than the [-1](=oldest) element of new_fetches
|
|
refetch = result.get("has_more", False)
|
|
|
|
new_messages = result.get("messages", [])
|
|
for m in new_messages:
|
|
return_messages.append(Message(m))
|
|
except SlackApiError: # Most likely a rate-limit
|
|
self.logger.error("Error while fetching channel messages. (likely rate limit) Retrying in {} seconds...".format(config["api_wait_time"]))
|
|
time.sleep(config["api_wait_time"])
|
|
refetch = True
|
|
|
|
self.logger.info(f"Fetched {len(return_messages)} new channel messages.")
|
|
return return_messages
|
|
|
|
|
|
|
|
def handle_incoming_message(self, message, say=None):
|
|
"""Reacts to all messages inside channel archiving. This either gets called when catching up on missed messages (by pre_start()) or by the SocketModeHandler in 'live' mode"""
|
|
if isinstance(message, dict):
|
|
try:
|
|
message = Message(message)
|
|
except MessageIsUnwanted:
|
|
return False
|
|
|
|
|
|
self.logger.info(f"Handling message {message} ({len(message.urls)} urls)")
|
|
|
|
|
|
if len(message.urls) > 1:
|
|
self.say_substitute("Only the first url is being handled. Please send any subsequent url as a separate message", thread_ts=message.thread.slack_ts)
|
|
|
|
self.callback(message = message)
|
|
|
|
|
|
def respond_channel_message(self, article, say=None):
|
|
if article.slack_ts == 0:
|
|
self.logger.error(f"{article} has no slack_ts")
|
|
else:
|
|
self.logger.info("Skipping slack reply.")
|
|
|
|
|
|
def startup_status(self):
|
|
"""Prints an overview of the articles. This needs to be called here because it should run after having fetched the newly sent messages"""
|
|
total = models.ArticleDownload.select().count()
|
|
to_be_processed = models.ArticleDownload.select().where(models.ArticleDownload.title == "").count()
|
|
unchecked = models.ArticleDownload.select().where(models.ArticleDownload.verified == 0).count()
|
|
bad = models.ArticleDownload.select().where(models.ArticleDownload.verified == -1).count()
|
|
not_uploaded = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "").count()
|
|
self.logger.info(
|
|
f"[bold]NEWS-FETCH DATABASE STATUS[/bold]: Total entries: {total}; Not yet downloaded: {to_be_processed}; Not yet checked: {unchecked}; Not yet uploaded to archive: {not_uploaded}; Marked as bad: {bad}",
|
|
extra={"markup": True}
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
class BotRunner():
|
|
logger = logging.getLogger(__name__)
|
|
|
|
"""Stupid encapsulation so that we can apply the slack decorators to the BotApp"""
|
|
def __init__(self, callback, *args, **kwargs) -> None:
|
|
self.bot_worker = BotApp(callback, token=config["auth_token"])
|
|
|
|
@self.bot_worker.event(event="message", matchers=[is_message_in_archiving])
|
|
def handle_incoming_message(message, say):
|
|
return self.bot_worker.handle_incoming_message(message, say)
|
|
|
|
# @self.bot_worker.event(event="reaction_added", matchers=[is_reaction_in_archiving])
|
|
# def handle_incoming_reaction(event, say):
|
|
# return self.bot_worker.handle_incoming_reaction(event)
|
|
|
|
@self.bot_worker.event(event="event")
|
|
def handle_all_other_reactions(event, say):
|
|
self.logger.log("Ignoring slack event that isn't a message")
|
|
|
|
self.handler = SocketModeHandler(self.bot_worker, config["app_token"])
|
|
|
|
|
|
def start(self):
|
|
self.bot_worker.pre_start()
|
|
self.handler.start()
|
|
|
|
|
|
def stop(self):
|
|
self.handler.close()
|
|
self.logger.info("Closed Slack-Socketmodehandler")
|
|
|
|
|
|
def send(self, article):
|
|
"""Proxy function to send a message to the slack channel, Called by ArticleWatcher once the Article is ready"""
|
|
self.bot_worker.respond_channel_message(article)
|
|
|
|
|
|
|
|
def is_message_in_archiving(message) -> bool:
|
|
return message["channel"] == config["archive_id"]
|
|
|