diff --git a/docker-compose.yaml b/docker-compose.yaml
index 9c91dcc..75e0d8a 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -34,7 +34,7 @@ services:
geckodriver: # separate docker container for pdf-download. This hugely improves stability (and creates shorter build times for the containers)
- image: selenium/standalone-firefox:103.0 # latest version because it mirrors the locally installed version (which is automatically updated)
+ image: ${GECKODRIVER_IMG}
environment:
- START_VNC=${HEADFULL-false} # as opposed to headless, used when requiring supervision (eg. for websites that crash)
- START_XVFB=${HEADFULL-false}
diff --git a/launch b/launch
index c50d61c..4160c49 100644
--- a/launch
+++ b/launch
@@ -5,10 +5,12 @@ set -o ignoreeof
echo "Bash script launching COSS_ARCHIVING..."
-# CHANGE ME!
+# CHANGE ME ONCE!
export CONTAINER_DATA=~/Bulk/COSS/Downloads/coss_archiving
export UNAME=remy
-
+# CHANGE ME WHEN UPDATING FIREFOX
+export GECKODRIVER_IMG=selenium/standalone-firefox:103.0
+# version must be >= than the one on the host or firefox will not start (because of mismatched config)
if [[ $1 == "debug" ]]
then
@@ -16,8 +18,8 @@ then
export HEADFULL=true
export CODE=./
export ENTRYPOINT=/bin/bash
- # since service ports is not enough here, also execute up, which will
- docker compose up -d
+ # since service ports does not open ports on implicitly started containers, also start geckodriver:
+ docker compose up -d geckodriver
elif [[ $1 == "production" ]]
then
export DEBUG=false
diff --git a/misc/gather_media_files.py b/misc/gather_media_files.py
index d60a4b8..f950243 100644
--- a/misc/gather_media_files.py
+++ b/misc/gather_media_files.py
@@ -1,5 +1,4 @@
import sys
-from webbrowser import get
sys.path.append("../app")
import runner
import logging
diff --git a/misc/migration.to_postgres.py b/misc/migration.to_postgres.py
new file mode 100644
index 0000000..7de1363
--- /dev/null
+++ b/misc/migration.to_postgres.py
@@ -0,0 +1,170 @@
+import datetime
+import sys
+sys.path.append("../news_fetch/")
+import configuration # lives in app
+from peewee import *
+
+import os
+import time
+
+old_db = SqliteDatabase("/app/containerdata/downloads.db")
+
+cred = configuration.db_config["DATABASE"]
+download_db = PostgresqlDatabase(
+ cred["db_name"], user=cred["user_name"], password=cred["password"], host="vpn", port=5432
+)
+
+## OLD Models
+class OLDModel(Model):
+ class Meta:
+ database = old_db
+
+
+class OLDArticleDownload(OLDModel):
+ class Meta:
+ db_table = 'articledownload'
+
+ title = CharField(default='')
+ pub_date = DateField(default = '')
+ download_date = DateField(default = 0)
+ source_name = CharField(default = '')
+ article_url = TextField(default = '', unique=True)
+ archive_url = TextField(default = '')
+ file_name = TextField(default = '')
+ language = CharField(default = '')
+ summary = TextField(default = '')
+ comment = TextField(default = '')
+ verified = IntegerField(default = False)
+ # authors
+ # keywords
+ # ... are added through foreignkeys
+
+
+
+
+class OLDArticleAuthor(OLDModel):
+ class Meta:
+ db_table = 'articleauthor'
+
+ article = ForeignKeyField(OLDArticleDownload, backref='authors')
+ author = CharField()
+
+
+
+class OLDArticleRelated(OLDModel):
+ class Meta:
+ db_table = 'articlerelated'
+
+ article = ForeignKeyField(OLDArticleDownload, backref='related')
+ related_file_name = TextField(default = '')
+
+
+
+
+## NEW Models
+class NEWModel(Model):
+ class Meta:
+ database = download_db
+
+
+class ArticleDownload(NEWModel):
+ # in the beginning this is all we have
+ article_url = TextField(default = '', unique=True)
+ # fetch then fills in the metadata
+ title = TextField(default='')
+ summary = TextField(default = '')
+ source_name = CharField(default = '')
+ language = CharField(default = '')
+ file_name = TextField(default = '')
+ archive_url = TextField(default = '')
+ pub_date = DateField(default = '')
+ download_date = DateField(default = 0)
+ slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by
+ sent = BooleanField(default = False)
+ archived_by = CharField(default = os.getenv("UNAME"))
+ # need to know who saved the message because the file needs to be on their computer in order to get verified
+ # verification happens in a different app, but the model has the fields here as well
+ comment = TextField(default = '')
+ verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad
+
+ def set_authors(self, authors):
+ for a in authors:
+ if len(a) < 100:
+ ArticleAuthor.create(
+ article = self,
+ author = a
+ )
+
+ def set_related(self, related):
+ for r in related:
+ ArticleRelated.create(
+ article = self,
+ related_file_name = r
+ )
+
+ # authors
+ # keywords
+ # ... are added through foreignkeys
+ # we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db
+
+
+
+class ArticleAuthor(NEWModel):
+ article = ForeignKeyField(ArticleDownload, backref='authors')
+ author = CharField()
+
+
+class ArticleRelated(NEWModel):
+ # Related files, such as the full text of a paper, audio files, etc.
+ article = ForeignKeyField(ArticleDownload, backref='related')
+ related_file_name = TextField(default = '')
+
+
+
+
+
+
+####################################################################
+# Migrate using sensible defaults:
+download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated])
+
+it = 0
+for old_art in OLDArticleDownload.select():
+ print("====================================================================")
+ it+=1
+ print(f"IT {it} New article with data:")
+ print(
+ old_art.article_url,
+ old_art.title,
+ old_art.summary,
+ old_art.source_name,
+ old_art.language,
+ old_art.file_name,
+ old_art.archive_url,
+ old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0),
+ old_art.download_date,
+ True,
+ old_art.comment,
+ old_art.verified
+ )
+ new_art = ArticleDownload.create(
+ article_url = old_art.article_url,
+ title = old_art.title,
+ summary = old_art.summary,
+ source_name = old_art.source_name,
+ language = old_art.language,
+ file_name = old_art.file_name,
+ archive_url = old_art.archive_url,
+ pub_date = old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0),
+ download_date = old_art.download_date,
+ # slack_ts = FloatField(default = 0)
+ sent = True,
+ # archived_by = CharField(default = os.getenv("UNAME"))
+ comment = old_art.comment,
+ verified = old_art.verified
+ )
+
+
+ new_art.set_related([r.related_file_name for r in old_art.related])
+ new_art.set_authors([a.author for a in old_art.authors])
+
\ No newline at end of file
diff --git a/news_check/client/src/App.svelte b/news_check/client/src/App.svelte
index e70395c..33da745 100644
--- a/news_check/client/src/App.svelte
+++ b/news_check/client/src/App.svelte
@@ -2,16 +2,38 @@
import PDFView from './PDFView.svelte';
import ArticleStatus from './ArticleStatus.svelte';
import ArticleOperations from './ArticleOperations.svelte';
+
+ let current_id = 0;
+
+ const updateInterface = (async () => {
+ let url = '';
+ if (current_id == 0) {
+ url = '/api/article/first';
+ } else {
+ url = '/api/article/' + current_id + '/next';
+ }
+ const response = await fetch(url)
+ const data = await response.json()
+ current_id = data.id;
+ let article_url = '/api/article/' + current_id + '/get';
+ const article_response = await fetch(article_url);
+ const article_data = await article_response.json();
+ return article_data;
+ })()
+
+
-
-
-
-
-
-
-
-
+{#await updateInterface}
+...
+{:then article_data}
+
-
-
+{/await}
diff --git a/news_check/client/src/ArticleOperations.svelte b/news_check/client/src/ArticleOperations.svelte
index b7a65ec..e1db46e 100644
--- a/news_check/client/src/ArticleOperations.svelte
+++ b/news_check/client/src/ArticleOperations.svelte
@@ -1,28 +1,93 @@
-
-
-
- New message arrived.
-
+
+
+
+
+
+
Your options: (click on action or use keyboard)
+
+
+
+
+
+ Action
+ Keyboard shortcut
+
+
+
+ {#each actions as action}
+
+
+ apiAction(action.kbd)}>{ action.name }
+ { action.kbd }
+
+
+ {/each}
+
+
+
-
-
01
-
01
-
01
-
01
-
01
-
01
-
01
-
01
-
01
+
+{#if toast_visible}
+
+
+
+ { toast_state.text }.
+
+
-
\ No newline at end of file
+{/if}
\ No newline at end of file
diff --git a/news_check/client/src/ArticleStatus.svelte b/news_check/client/src/ArticleStatus.svelte
index b26a44c..3e3dd07 100644
--- a/news_check/client/src/ArticleStatus.svelte
+++ b/news_check/client/src/ArticleStatus.svelte
@@ -1,25 +1,38 @@
-
-
Article overview
-
- {#await Article}
- ...waiting
- {:then data}
- {data.value}
- Item 2
- {:catch error}
- An error occurred!
- {/await}
-
-
-
+
+
+
Article overview:
+
+
+
+ Attribute
+ Value
+
+
+
+ {#each status_items as item}
+
+ { item.name }
+
+ {#if item.value != ""}
+ { item.value }
+ {:else}
+ { item.value }
+ {/if}
+
+ {/each}
+
+
+
\ No newline at end of file
diff --git a/news_check/client/src/PDFView.svelte b/news_check/client/src/PDFView.svelte
index c8fda36..9c3870f 100644
--- a/news_check/client/src/PDFView.svelte
+++ b/news_check/client/src/PDFView.svelte
@@ -1,64 +1,10 @@
-
-
-