few bugs in news_fetch left, news_chek wip

This commit is contained in:
Remy Moll 2022-09-06 22:15:26 +02:00
parent 2e65828bbb
commit 713406dc67
15 changed files with 537 additions and 267 deletions

View File

@ -34,7 +34,7 @@ services:
geckodriver: # separate docker container for pdf-download. This hugely improves stability (and creates shorter build times for the containers) geckodriver: # separate docker container for pdf-download. This hugely improves stability (and creates shorter build times for the containers)
image: selenium/standalone-firefox:103.0 # latest version because it mirrors the locally installed version (which is automatically updated) image: ${GECKODRIVER_IMG}
environment: environment:
- START_VNC=${HEADFULL-false} # as opposed to headless, used when requiring supervision (eg. for websites that crash) - START_VNC=${HEADFULL-false} # as opposed to headless, used when requiring supervision (eg. for websites that crash)
- START_XVFB=${HEADFULL-false} - START_XVFB=${HEADFULL-false}

10
launch
View File

@ -5,10 +5,12 @@ set -o ignoreeof
echo "Bash script launching COSS_ARCHIVING..." echo "Bash script launching COSS_ARCHIVING..."
# CHANGE ME! # CHANGE ME ONCE!
export CONTAINER_DATA=~/Bulk/COSS/Downloads/coss_archiving export CONTAINER_DATA=~/Bulk/COSS/Downloads/coss_archiving
export UNAME=remy export UNAME=remy
# CHANGE ME WHEN UPDATING FIREFOX
export GECKODRIVER_IMG=selenium/standalone-firefox:103.0
# version must be >= than the one on the host or firefox will not start (because of mismatched config)
if [[ $1 == "debug" ]] if [[ $1 == "debug" ]]
then then
@ -16,8 +18,8 @@ then
export HEADFULL=true export HEADFULL=true
export CODE=./ export CODE=./
export ENTRYPOINT=/bin/bash export ENTRYPOINT=/bin/bash
# since service ports is not enough here, also execute up, which will # since service ports does not open ports on implicitly started containers, also start geckodriver:
docker compose up -d docker compose up -d geckodriver
elif [[ $1 == "production" ]] elif [[ $1 == "production" ]]
then then
export DEBUG=false export DEBUG=false

View File

@ -1,5 +1,4 @@
import sys import sys
from webbrowser import get
sys.path.append("../app") sys.path.append("../app")
import runner import runner
import logging import logging

View File

@ -0,0 +1,170 @@
import datetime
import sys
sys.path.append("../news_fetch/")
import configuration # lives in app
from peewee import *
import os
import time
old_db = SqliteDatabase("/app/containerdata/downloads.db")
cred = configuration.db_config["DATABASE"]
download_db = PostgresqlDatabase(
cred["db_name"], user=cred["user_name"], password=cred["password"], host="vpn", port=5432
)
## OLD Models
class OLDModel(Model):
class Meta:
database = old_db
class OLDArticleDownload(OLDModel):
class Meta:
db_table = 'articledownload'
title = CharField(default='')
pub_date = DateField(default = '')
download_date = DateField(default = 0)
source_name = CharField(default = '')
article_url = TextField(default = '', unique=True)
archive_url = TextField(default = '')
file_name = TextField(default = '')
language = CharField(default = '')
summary = TextField(default = '')
comment = TextField(default = '')
verified = IntegerField(default = False)
# authors
# keywords
# ... are added through foreignkeys
class OLDArticleAuthor(OLDModel):
class Meta:
db_table = 'articleauthor'
article = ForeignKeyField(OLDArticleDownload, backref='authors')
author = CharField()
class OLDArticleRelated(OLDModel):
class Meta:
db_table = 'articlerelated'
article = ForeignKeyField(OLDArticleDownload, backref='related')
related_file_name = TextField(default = '')
## NEW Models
class NEWModel(Model):
class Meta:
database = download_db
class ArticleDownload(NEWModel):
# in the beginning this is all we have
article_url = TextField(default = '', unique=True)
# fetch then fills in the metadata
title = TextField(default='')
summary = TextField(default = '')
source_name = CharField(default = '')
language = CharField(default = '')
file_name = TextField(default = '')
archive_url = TextField(default = '')
pub_date = DateField(default = '')
download_date = DateField(default = 0)
slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by
sent = BooleanField(default = False)
archived_by = CharField(default = os.getenv("UNAME"))
# need to know who saved the message because the file needs to be on their computer in order to get verified
# verification happens in a different app, but the model has the fields here as well
comment = TextField(default = '')
verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad
def set_authors(self, authors):
for a in authors:
if len(a) < 100:
ArticleAuthor.create(
article = self,
author = a
)
def set_related(self, related):
for r in related:
ArticleRelated.create(
article = self,
related_file_name = r
)
# authors
# keywords
# ... are added through foreignkeys
# we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db
class ArticleAuthor(NEWModel):
article = ForeignKeyField(ArticleDownload, backref='authors')
author = CharField()
class ArticleRelated(NEWModel):
# Related files, such as the full text of a paper, audio files, etc.
article = ForeignKeyField(ArticleDownload, backref='related')
related_file_name = TextField(default = '')
####################################################################
# Migrate using sensible defaults:
download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated])
it = 0
for old_art in OLDArticleDownload.select():
print("====================================================================")
it+=1
print(f"IT {it} New article with data:")
print(
old_art.article_url,
old_art.title,
old_art.summary,
old_art.source_name,
old_art.language,
old_art.file_name,
old_art.archive_url,
old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0),
old_art.download_date,
True,
old_art.comment,
old_art.verified
)
new_art = ArticleDownload.create(
article_url = old_art.article_url,
title = old_art.title,
summary = old_art.summary,
source_name = old_art.source_name,
language = old_art.language,
file_name = old_art.file_name,
archive_url = old_art.archive_url,
pub_date = old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0),
download_date = old_art.download_date,
# slack_ts = FloatField(default = 0)
sent = True,
# archived_by = CharField(default = os.getenv("UNAME"))
comment = old_art.comment,
verified = old_art.verified
)
new_art.set_related([r.related_file_name for r in old_art.related])
new_art.set_authors([a.author for a in old_art.authors])

View File

@ -2,16 +2,38 @@
import PDFView from './PDFView.svelte'; import PDFView from './PDFView.svelte';
import ArticleStatus from './ArticleStatus.svelte'; import ArticleStatus from './ArticleStatus.svelte';
import ArticleOperations from './ArticleOperations.svelte'; import ArticleOperations from './ArticleOperations.svelte';
let current_id = 0;
const updateInterface = (async () => {
let url = '';
if (current_id == 0) {
url = '/api/article/first';
} else {
url = '/api/article/' + current_id + '/next';
}
const response = await fetch(url)
const data = await response.json()
current_id = data.id;
let article_url = '/api/article/' + current_id + '/get';
const article_response = await fetch(article_url);
const article_data = await article_response.json();
return article_data;
})()
</script> </script>
{#await updateInterface}
<div class="flex w-full h-full gap-5 p-5"> ...
<div class="w-3/5"><PDFView/></div> {:then article_data}
<div class="flex w-full h-screen gap-5 p-5">
<div class="w-3/5"><PDFView article_data={article_data}/></div>
<div class="divider divider-horizontal"></div> <div class="divider divider-horizontal"></div>
<div class="w-2/5"> <div class="w-2/5">
<ArticleStatus article_id={42}/> <ArticleStatus article_data={article_data}/>
<div class="divider divider-vertical"></div> <div class="divider divider-vertical"></div>
<ArticleOperations/> <ArticleOperations article_data={article_data}/>
</div> </div>
</div> </div>
{/await}

View File

@ -1,28 +1,93 @@
<div class="toast"> <script>
<div class="alert alert-info"> import {fade} from 'svelte/transition';
<div>
<span>New message arrived.</span>
</div>
</div>
</div>
<div class="grid grid-cols-3 gap-4"> export let article_data;
<div class="highlight">01</div>
<div class="highlight">01</div>
<div class="highlight">01</div>
<div class="highlight">01</div>
<div class="highlight">01</div>
<div class="highlight">01</div>
<div class="highlight">01</div>
<div class="highlight">01</div>
<div class="highlight">01</div>
</div> const actions = [
<style> {name: 'Mark as good (and skip to next)', kbd: 'A'},
.highlight { {name: 'Mark as bad (and skip to next)', kbd: 'B'},
background-color: #f5f5f5; {name: 'Upload related file', kbd: 'R'},
border-radius: 5px; {name: 'Skip', kbd: 'ctrl'},
padding: 10px; ]
margin: 10px;
const toast_states = {
'success' : {class: 'alert-success', text: 'Article updated successfully'},
'error' : {class: 'alert-error', text: 'Article update failed'},
} }
</style> let toast_state = {};
let toast_visible = false;
function onKeyDown(e) {apiAction(e.key)}
function apiAction(key) {
if (actions.map(d => d.kbd.toLowerCase()).includes(key.toLowerCase())){ // ignore other keypresses
const updateArticle = (async() => {
const response = await fetch('/api/article/' + article_data.id + '/set', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({
'action': key.toLowerCase(),
})
})
const success = response.status == 200;
if (success){
showToast('success');
} else {
showToast('error');
}
})()
}
}
function showToast(state){
toast_visible = true;
toast_state = toast_states[state];
setTimeout(() => {
toast_visible = false;
}, 1000)
}
</script>
<div class="card bg-neutral-300 shadow-xl">
<div class="card-body">
<h2 class="card-title">Your options: (click on action or use keyboard)</h2>
<div class="overflow-x-auto">
<table class="table w-full table-compact">
<!-- head -->
<thead>
<tr>
<th>Action</th>
<th>Keyboard shortcut</th>
</tr>
</thead>
<tbody>
{#each actions as action}
<tr>
<td><button on:click={() => apiAction(action.kbd)}>{ action.name }</button></td>
<td><kbd class="kbd">{ action.kbd }</kbd></td>
</tr>
{/each}
</tbody>
</table>
</div>
</div>
</div>
<svelte:window on:keydown|preventDefault={onKeyDown} />
{#if toast_visible}
<div class="toast" transition:fade>
<div class="alert { toast_state.class }">
<div>
<span>{ toast_state.text }.</span>
</div>
</div>
</div>
{/if}

View File

@ -1,25 +1,38 @@
<script> <script>
export let article_id; export let article_data;
const Article = (async () => { const status_items = [
const response = await fetch('/api/article/' + article_id + '/get') {name: 'Title', value: article_data.title},
return await response.json() {name: 'Filename', value: article_data.file_name},
})() {name: 'Language', value: article_data.language},
console.log(Article) {name: 'Authors', value: article_data.authors},
{name: "Related", value: article_data.related},
]
</script> </script>
<div class="mockup-window border bg-base-300"> <div class="card bg-neutral-300 shadow-xl overflow-x-auto">
<h1 class="center">Article overview</h1> <div class="card-body">
<ul tabindex="0" class="menu p-2 shadow bg-base-100 rounded-box w-52"> <h2 class="card-title">Article overview:</h2>
{#await Article} <table class="table w-full table-compact" style="table-layout: fixed">
<li>...waiting</li> <thead>
{:then data} <tr>
<li><a href="#">{data.value}</a></li> <th>Attribute</th>
<li><a href="#">Item 2</a></li> <th>Value</th>
{:catch error} </tr>
<li>An error occurred!</li> </thead>
{/await} <tbody>
{#each status_items as item}
<tr>
</ul> <td>{ item.name }</td>
<!-- <td>Quality Control Specialist</td> -->
{#if item.value != ""}
<td class='bg-emerald-200' style="white-space: normal">{ item.value }</td>
{:else}
<td class='bg-red-200'>{ item.value }</td>
{/if}
</tr>
{/each}
</tbody>
</table>
</div>
</div> </div>

View File

@ -1,64 +1,10 @@
<!--
<script>
var myState = {
pdf: null,
currentPage: 1,
zoom: 1
}
pdfjsLib.getDocument('test.pdf').then((pdf) => {
myState.pdf = pdf;
render();
});
function render() {
myState.pdf.getPage(myState.currentPage).then((page) => {
var canvas = document.getElementById("pdf_renderer");
var ctx = canvas.getContext('2d');
var viewport = page.getViewport(myState.zoom);
canvas.width = viewport.width;
canvas.height = viewport.height;
page.render({
canvasContext: ctx,
viewport: viewport
});
});
}
</script>
-->
<!-- <div id="my_pdf_viewer">
<div class="mockup-window border bg-base-300">
<div id="canvas_container" class="flex justify-center">
<canvas id="pdf_renderer"></canvas>
</div>
</div>
<div id="navigation_controls">
<button id="go_previous">Previous</button>
<input id="current_page" value="1" type="number"/>
<button id="go_next">Next</button>
</div>
<div id="zoom_controls">
<button id="zoom_in">+</button>
<button id="zoom_out">-</button>
</div>
</div> -->
<script> <script>
let pdf_file = 'test.pdf'; export let article_data;
</script> </script>
<div class="mockup-window border bg-base-300 h-full w-full"> <div class="h-full w-full shadow-xl">
<object class="pdf-view" data="{pdf_file}" title="Article PDF"> </object> <object class="pdf-view" data="{article_data.save_path + article_data.file_name}" title="Article PDF"> </object>
</div> </div>
<style> <style>

View File

@ -1,7 +1,7 @@
import json from flask import Flask, send_from_directory, request
from flask import Flask, send_from_directory, jsonify import configuration
import random models = configuration.models
db = configuration.db
app = Flask(__name__) app = Flask(__name__)
@ -9,26 +9,55 @@ app = Flask(__name__)
# SVELTE 'STATIC' BACKEND. Always send index.html and the requested js-files. (compiled by npm) # SVELTE 'STATIC' BACKEND. Always send index.html and the requested js-files. (compiled by npm)
@app.route("/") #index.html @app.route("/") #index.html
def base(): def index():
return send_from_directory('../client/public', 'index.html') return send_from_directory('../client/public', 'index.html')
@app.route("/<path:path>") #js-files @app.route("/<path:path>") #js-files
def home(path): def js(path):
return send_from_directory('../client/public', path) return send_from_directory('../client/public', path)
@app.route("/app/containerdata/files/<path:path>")
def static_pdf(path):
return send_from_directory('/app/containerdata/files/', path)
############################################################################### ###############################################################################
# API for news_check. # (simple) API for news_check.
@app.route("/api/article/<int:id>/get") @app.route("/api/article/<int:id>/get")
def get_article(id): def get_article_by_id(id):
res = {"value": id} with db:
return jsonify(res) article = models.ArticleDownload.get_by_id(id)
return article.to_dict()
@app.route("/api/article/first")
def get_article_first():
with db:
article = models.ArticleDownload.select(models.ArticleDownload.id).where(models.ArticleDownload.verified == 0).order_by(models.ArticleDownload.id).first()
return {"id" : article.id}
@app.route("/api/article/<int:id>/next")
def get_article_next(id):
with db:
if models.ArticleDownload.get_by_id(id + 1).verified == 0:
return {"id" : id + 1}
else:
return get_article_first()
@app.route("/api/article/<int:id>/set", methods=['POST']) @app.route("/api/article/<int:id>/set", methods=['POST'])
def set_article(id): def set_article(id):
return str(random.randint(0, 100)) action = request.json['action']
with db:
article = models.ArticleDownload.get_by_id(id)
if action == "a":
article.verified = 1
elif action == "b":
article.verified = -1
elif action == "r":
article.set_related()
article.save()
return "ok"

View File

@ -0,0 +1,16 @@
from peewee import PostgresqlDatabase
import configparser
main_config = configparser.ConfigParser()
main_config.read("/app/containerdata/config/news_fetch.config.ini")
db_config = configparser.ConfigParser()
db_config.read("/app/containerdata/config/db.config.ini")
cred = db_config["DATABASE"]
db = PostgresqlDatabase(
cred["db_name"], user=cred["user_name"], password=cred["password"], host="vpn", port=5432
)
import models
models.set_db(db)

134
news_check/server/models.py Normal file
View File

@ -0,0 +1,134 @@
import logging
logger = logging.getLogger(__name__)
from peewee import *
import os
import datetime
import configuration
config = configuration.main_config["DOWNLOADS"]
# set the nature of the db at runtime
download_db = DatabaseProxy()
class DownloadBaseModel(Model):
class Meta:
database = download_db
## == Article related models == ##
class ArticleDownload(DownloadBaseModel):
# in the beginning this is all we have
article_url = TextField(default = '', unique=True)
# fetch then fills in the metadata
title = TextField(default='')
summary = TextField(default = '')
source_name = CharField(default = '')
language = CharField(default = '')
file_name = TextField(default = '')
@property
def save_path(self):
return f"{config['local_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/"
@property
def fname_nas(self, file_name=""):
if self.download_date:
if file_name:
return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{file_name}"
else: # return the self. name
return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{self.file_name}"
else:
return None
archive_url = TextField(default = '')
pub_date = DateField(default = datetime.date.fromtimestamp(0))
download_date = DateField(default = datetime.date.today)
slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by
sent = BooleanField(default = False)
archived_by = CharField(default = os.getenv("UNAME"))
# need to know who saved the message because the file needs to be on their computer in order to get verified
# verification happens in a different app, but the model has the fields here as well
comment = TextField(default = '')
verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad
# authors
# keywords
# ... are added through foreignkeys
# we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db
def to_dict(self):
return {
"id": self.id,
"article_url": self.article_url,
"title": self.title,
"summary": self.summary,
"source_name": self.source_name,
"language": self.language,
"file_name": self.file_name,
"save_path": self.save_path,
"fname_nas": self.fname_nas,
"archive_url": self.archive_url,
"pub_date": self.pub_date.strftime("%Y-%m-%d"),
"download_date": self.download_date.strftime("%Y-%m-%d"),
"sent": self.sent,
"comment": self.comment,
"related": [r.related_file_name for r in self.related],
"authors": [a.author for a in self.authors]
}
def set_related(self, related):
for r in related:
if len(r) > 255:
raise Exception("Related file name too long for POSTGRES")
ArticleRelated.create(
article = self,
related_file_name = r
)
def file_status(self):
if not self.file_name:
logger.error(f"Article {self} has no filename!")
return False, {"reply_text": "Download failed, no file was saved.", "file_path": None}
file_path_abs = self.save_path + self.file_name
if not os.path.exists(file_path_abs):
logger.error(f"Article {self} has a filename, but the file does not exist at that location!")
return False, {"reply_text": "Can't find file. Either the download failed or the file was moved.", "file_path": None}
return True, {}
class ArticleAuthor(DownloadBaseModel):
article = ForeignKeyField(ArticleDownload, backref='authors')
author = CharField()
class ArticleRelated(DownloadBaseModel):
# Related files, such as the full text of a paper, audio files, etc.
article = ForeignKeyField(ArticleDownload, backref='related')
related_file_name = TextField(default = '')
def set_db(download_db_object):
download_db.initialize(download_db_object)
with download_db: # create tables (does nothing if they exist already)
download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated])

View File

@ -1,20 +0,0 @@
import peewee
db = peewee.PostgresqlDatabase('coss_archiving', user='ca_rw', password='pleasechangeit', host='vpn', port=5432)
# db.connect()
class Pet(peewee.Model):
name = peewee.CharField()
animal_type = peewee.CharField()
class Meta:
database = db # this model uses the "people.db" database
with db:
db.create_tables([Pet])
db.get_tables()
t = Pet.create(name="Test", animal_type="test")
for pet in Pet.select():
print(pet.name)

View File

@ -8,3 +8,4 @@ newspaper3k
htmldate htmldate
markdown markdown
rich rich
psycopg2

View File

@ -123,7 +123,6 @@ class Coordinator(Thread):
unsent = models.ArticleDownload.filter(sent = False) unsent = models.ArticleDownload.filter(sent = False)
# .objects.filter(sent = False) # .objects.filter(sent = False)
for a in unsent: for a in unsent:
print(a)
self.incoming_request(article=a) self.incoming_request(article=a)
@ -170,7 +169,7 @@ class Coordinator(Thread):
for article in articles: for article in articles:
notifier = lambda article: logger.info(f"Completed manual actions for {article}") notifier = lambda article: logger.info(f"Completed manual actions for {article}")
ArticleWatcher(article, None, workers_manual = workers, notifier = notifier) # Article watcher wants a thread to link article to TODO: handle threads as a kwarg ArticleWatcher(article, workers_manual = workers, notifier = notifier) # Article watcher wants a thread to link article to TODO: handle threads as a kwarg
def article_complete_notifier(self, article): def article_complete_notifier(self, article):
if self.worker_slack is None: if self.worker_slack is None:
@ -192,7 +191,7 @@ if __name__ == "__main__":
if "upload" in sys.argv: if "upload" in sys.argv:
articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "").execute() articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "" or models.ArticleDownload.archive_url == "TODO:UPLOAD").execute()
logger.info(f"Launching upload to archive for {len(articles)} articles.") logger.info(f"Launching upload to archive for {len(articles)} articles.")
coordinator.manual_processing(articles, [UploadWorker()]) coordinator.manual_processing(articles, [UploadWorker()])

View File

@ -4,7 +4,6 @@ logger = logging.getLogger(__name__)
from peewee import * from peewee import *
import os import os
import markdown import markdown
import re
import configuration import configuration
import datetime import datetime
@ -28,7 +27,7 @@ class ArticleDownload(DownloadBaseModel):
article_url = TextField(default = '', unique=True) article_url = TextField(default = '', unique=True)
# fetch then fills in the metadata # fetch then fills in the metadata
title = CharField(default='') title = TextField(default='')
@property @property
def is_title_bad(self): # add incrementally def is_title_bad(self): # add incrementally
return "PUR-Abo" in self.title \ return "PUR-Abo" in self.title \
@ -63,7 +62,7 @@ class ArticleDownload(DownloadBaseModel):
archive_url = TextField(default = '') archive_url = TextField(default = '')
pub_date = DateField(default = '') pub_date = DateField(default = datetime.date.fromtimestamp(0))
download_date = DateField(default = datetime.date.today) download_date = DateField(default = datetime.date.today)
slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by
@ -143,6 +142,7 @@ class ArticleDownload(DownloadBaseModel):
def set_authors(self, authors): def set_authors(self, authors):
for a in authors: for a in authors:
if len(a) < 100: # otherwise it's a mismatched string
ArticleAuthor.create( ArticleAuthor.create(
article = self, article = self,
author = a author = a
@ -150,6 +150,9 @@ class ArticleDownload(DownloadBaseModel):
def set_related(self, related): def set_related(self, related):
for r in related: for r in related:
if len(r) > 255:
raise Exception("Related file name too long for POSTGRES")
ArticleRelated.create( ArticleRelated.create(
article = self, article = self,
related_file_name = r related_file_name = r
@ -182,116 +185,7 @@ class ArticleRelated(DownloadBaseModel):
# class Thread(ChatBaseModel):
# """The threads that concern us are only created if the base massage contains a url"""
# thread_ts = FloatField(default = 0)
# article = ForeignKeyField(ArticleDownload, backref="slack_thread", null=True, default=None)
# # provides, ts, user, models
# # messages
# @property
# def slack_ts(self):
# str_ts = str(self.thread_ts)
# cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem!
# return "{}{}".format(str_ts, cut_zeros*"0")
# @property
# def initiator_message(self):
# try:
# return self.messages[0] # TODO check if this needs sorting
# except IndexError:
# logger.warning(f"Thread {self} is empty. How can that be?")
# return None
# @property
# def message_count(self):
# # logger.warning("message_count was called")
# return self.messages.count()
# @property
# def last_message(self):
# messages = Message.select().where(Message.thread == self).order_by(Message.ts) # can't be empty by definition/creation
# return messages[-1]
# @property
# def is_fully_processed(self) -> bool:
# init_message = self.initiator_message
# if init_message is None:
# return False
# if init_message.is_processed_override:
# return True
# # this override is set for instance, when no url was sent at all. Then set this thread to be ignored
# reactions = init_message.reaction
# if not reactions:
# return False
# else:
# r = reactions[0].type # can and should only have one reaction
# return r == "white_check_mark" \
# or r == "x"
# class Message(ChatBaseModel):
# ts = FloatField(unique=True) #for sorting
# channel_id = CharField(default='')
# user = ForeignKeyField(User, backref="messages")
# text = TextField(default='')
# thread = ForeignKeyField(Thread, backref="messages", default=None)
# file_type = CharField(default='')
# perma_link = CharField(default='')
# is_processed_override = BooleanField(default=False)
# # reaction
# def __str__(self) -> str:
# return "MSG [{}]".format(shorten_name(self.text).replace('\n','/'))
# @property
# def slack_ts(self):
# str_ts = str(self.ts)
# cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem!
# return "{}{}".format(str_ts, cut_zeros * "0")
# @property
# def urls(self):
# pattern = r"<(.*?)>"
# matches = re.findall(pattern, self.text)
# matches = [m for m in matches if "." in m]
# new_matches = []
# for m in matches:
# if "." in m: # must contain a tld, right?
# # further complication: slack automatically abreviates urls in the format:
# # <url|link preview>. Lucky for us, "|" is a character derecommended in urls, meaning we can "safely" split for it and retain the first half
# if "|" in m:
# keep = m.split("|")[0]
# else:
# keep = m
# new_matches.append(keep)
# return new_matches
# @property
# def is_by_human(self):
# return self.user.user_id != slack_config["bot_id"]
# @property
# def has_single_url(self):
# return len(self.urls) == 1
def set_db(download_db_object): def set_db(download_db_object):
download_db.initialize(download_db_object) download_db.initialize(download_db_object)
with download_db: # create tables (does nothing if they exist already) with download_db: # create tables (does nothing if they exist already)
download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated]) download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated])