diff --git a/docker-compose.yaml b/docker-compose.yaml index 9c91dcc..75e0d8a 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -34,7 +34,7 @@ services: geckodriver: # separate docker container for pdf-download. This hugely improves stability (and creates shorter build times for the containers) - image: selenium/standalone-firefox:103.0 # latest version because it mirrors the locally installed version (which is automatically updated) + image: ${GECKODRIVER_IMG} environment: - START_VNC=${HEADFULL-false} # as opposed to headless, used when requiring supervision (eg. for websites that crash) - START_XVFB=${HEADFULL-false} diff --git a/launch b/launch index c50d61c..4160c49 100644 --- a/launch +++ b/launch @@ -5,10 +5,12 @@ set -o ignoreeof echo "Bash script launching COSS_ARCHIVING..." -# CHANGE ME! +# CHANGE ME ONCE! export CONTAINER_DATA=~/Bulk/COSS/Downloads/coss_archiving export UNAME=remy - +# CHANGE ME WHEN UPDATING FIREFOX +export GECKODRIVER_IMG=selenium/standalone-firefox:103.0 +# version must be >= than the one on the host or firefox will not start (because of mismatched config) if [[ $1 == "debug" ]] then @@ -16,8 +18,8 @@ then export HEADFULL=true export CODE=./ export ENTRYPOINT=/bin/bash - # since service ports is not enough here, also execute up, which will - docker compose up -d + # since service ports does not open ports on implicitly started containers, also start geckodriver: + docker compose up -d geckodriver elif [[ $1 == "production" ]] then export DEBUG=false diff --git a/misc/gather_media_files.py b/misc/gather_media_files.py index d60a4b8..f950243 100644 --- a/misc/gather_media_files.py +++ b/misc/gather_media_files.py @@ -1,5 +1,4 @@ import sys -from webbrowser import get sys.path.append("../app") import runner import logging diff --git a/misc/migration.to_postgres.py b/misc/migration.to_postgres.py new file mode 100644 index 0000000..7de1363 --- /dev/null +++ b/misc/migration.to_postgres.py @@ -0,0 +1,170 @@ +import datetime +import sys +sys.path.append("../news_fetch/") +import configuration # lives in app +from peewee import * + +import os +import time + +old_db = SqliteDatabase("/app/containerdata/downloads.db") + +cred = configuration.db_config["DATABASE"] +download_db = PostgresqlDatabase( + cred["db_name"], user=cred["user_name"], password=cred["password"], host="vpn", port=5432 +) + +## OLD Models +class OLDModel(Model): + class Meta: + database = old_db + + +class OLDArticleDownload(OLDModel): + class Meta: + db_table = 'articledownload' + + title = CharField(default='') + pub_date = DateField(default = '') + download_date = DateField(default = 0) + source_name = CharField(default = '') + article_url = TextField(default = '', unique=True) + archive_url = TextField(default = '') + file_name = TextField(default = '') + language = CharField(default = '') + summary = TextField(default = '') + comment = TextField(default = '') + verified = IntegerField(default = False) + # authors + # keywords + # ... are added through foreignkeys + + + + +class OLDArticleAuthor(OLDModel): + class Meta: + db_table = 'articleauthor' + + article = ForeignKeyField(OLDArticleDownload, backref='authors') + author = CharField() + + + +class OLDArticleRelated(OLDModel): + class Meta: + db_table = 'articlerelated' + + article = ForeignKeyField(OLDArticleDownload, backref='related') + related_file_name = TextField(default = '') + + + + +## NEW Models +class NEWModel(Model): + class Meta: + database = download_db + + +class ArticleDownload(NEWModel): + # in the beginning this is all we have + article_url = TextField(default = '', unique=True) + # fetch then fills in the metadata + title = TextField(default='') + summary = TextField(default = '') + source_name = CharField(default = '') + language = CharField(default = '') + file_name = TextField(default = '') + archive_url = TextField(default = '') + pub_date = DateField(default = '') + download_date = DateField(default = 0) + slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by + sent = BooleanField(default = False) + archived_by = CharField(default = os.getenv("UNAME")) + # need to know who saved the message because the file needs to be on their computer in order to get verified + # verification happens in a different app, but the model has the fields here as well + comment = TextField(default = '') + verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad + + def set_authors(self, authors): + for a in authors: + if len(a) < 100: + ArticleAuthor.create( + article = self, + author = a + ) + + def set_related(self, related): + for r in related: + ArticleRelated.create( + article = self, + related_file_name = r + ) + + # authors + # keywords + # ... are added through foreignkeys + # we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db + + + +class ArticleAuthor(NEWModel): + article = ForeignKeyField(ArticleDownload, backref='authors') + author = CharField() + + +class ArticleRelated(NEWModel): + # Related files, such as the full text of a paper, audio files, etc. + article = ForeignKeyField(ArticleDownload, backref='related') + related_file_name = TextField(default = '') + + + + + + +#################################################################### +# Migrate using sensible defaults: +download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated]) + +it = 0 +for old_art in OLDArticleDownload.select(): + print("====================================================================") + it+=1 + print(f"IT {it} New article with data:") + print( + old_art.article_url, + old_art.title, + old_art.summary, + old_art.source_name, + old_art.language, + old_art.file_name, + old_art.archive_url, + old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0), + old_art.download_date, + True, + old_art.comment, + old_art.verified + ) + new_art = ArticleDownload.create( + article_url = old_art.article_url, + title = old_art.title, + summary = old_art.summary, + source_name = old_art.source_name, + language = old_art.language, + file_name = old_art.file_name, + archive_url = old_art.archive_url, + pub_date = old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0), + download_date = old_art.download_date, + # slack_ts = FloatField(default = 0) + sent = True, + # archived_by = CharField(default = os.getenv("UNAME")) + comment = old_art.comment, + verified = old_art.verified + ) + + + new_art.set_related([r.related_file_name for r in old_art.related]) + new_art.set_authors([a.author for a in old_art.authors]) + \ No newline at end of file diff --git a/news_check/client/src/App.svelte b/news_check/client/src/App.svelte index e70395c..33da745 100644 --- a/news_check/client/src/App.svelte +++ b/news_check/client/src/App.svelte @@ -2,16 +2,38 @@ import PDFView from './PDFView.svelte'; import ArticleStatus from './ArticleStatus.svelte'; import ArticleOperations from './ArticleOperations.svelte'; + + let current_id = 0; + + const updateInterface = (async () => { + let url = ''; + if (current_id == 0) { + url = '/api/article/first'; + } else { + url = '/api/article/' + current_id + '/next'; + } + const response = await fetch(url) + const data = await response.json() + current_id = data.id; + let article_url = '/api/article/' + current_id + '/get'; + const article_response = await fetch(article_url); + const article_data = await article_response.json(); + return article_data; + })() + + - -
-
-
-
- -
- +{#await updateInterface} +... +{:then article_data} +
+
+
+
+ +
+ +
-
- +{/await} diff --git a/news_check/client/src/ArticleOperations.svelte b/news_check/client/src/ArticleOperations.svelte index b7a65ec..e1db46e 100644 --- a/news_check/client/src/ArticleOperations.svelte +++ b/news_check/client/src/ArticleOperations.svelte @@ -1,28 +1,93 @@ -
-
-
- New message arrived. -
+ + + +
+
+

Your options: (click on action or use keyboard)

+
+ + + + + + + + + + {#each actions as action} + + + + + + + {/each} + +
ActionKeyboard shortcut
{ action.kbd }
+
-
-
01
-
01
-
01
-
01
-
01
-
01
-
01
-
01
-
01
+ +{#if toast_visible} +
+
+
+ { toast_state.text }. +
+
- \ No newline at end of file +{/if} \ No newline at end of file diff --git a/news_check/client/src/ArticleStatus.svelte b/news_check/client/src/ArticleStatus.svelte index b26a44c..3e3dd07 100644 --- a/news_check/client/src/ArticleStatus.svelte +++ b/news_check/client/src/ArticleStatus.svelte @@ -1,25 +1,38 @@ -
-

Article overview

- +
+
+

Article overview:

+ + + + + + + + + {#each status_items as item} + + + + {#if item.value != ""} + + {:else} + + {/if} + + {/each} + +
AttributeValue
{ item.name }{ item.value }{ item.value }
+
\ No newline at end of file diff --git a/news_check/client/src/PDFView.svelte b/news_check/client/src/PDFView.svelte index c8fda36..9c3870f 100644 --- a/news_check/client/src/PDFView.svelte +++ b/news_check/client/src/PDFView.svelte @@ -1,64 +1,10 @@ - - -
- +
+