170 lines
5.0 KiB
Python
170 lines
5.0 KiB
Python
import datetime
|
|
import sys
|
|
sys.path.append("../news_fetch/")
|
|
import configuration # lives in app
|
|
from peewee import *
|
|
|
|
import os
|
|
import time
|
|
|
|
old_db = SqliteDatabase("/app/containerdata/downloads.db")
|
|
|
|
cred = configuration.db_config["DATABASE"]
|
|
download_db = PostgresqlDatabase(
|
|
cred["db_name"], user=cred["user_name"], password=cred["password"], host="vpn", port=5432
|
|
)
|
|
|
|
## OLD Models
|
|
class OLDModel(Model):
|
|
class Meta:
|
|
database = old_db
|
|
|
|
|
|
class OLDArticleDownload(OLDModel):
|
|
class Meta:
|
|
db_table = 'articledownload'
|
|
|
|
title = CharField(default='')
|
|
pub_date = DateField(default = '')
|
|
download_date = DateField(default = 0)
|
|
source_name = CharField(default = '')
|
|
article_url = TextField(default = '', unique=True)
|
|
archive_url = TextField(default = '')
|
|
file_name = TextField(default = '')
|
|
language = CharField(default = '')
|
|
summary = TextField(default = '')
|
|
comment = TextField(default = '')
|
|
verified = IntegerField(default = False)
|
|
# authors
|
|
# keywords
|
|
# ... are added through foreignkeys
|
|
|
|
|
|
|
|
|
|
class OLDArticleAuthor(OLDModel):
|
|
class Meta:
|
|
db_table = 'articleauthor'
|
|
|
|
article = ForeignKeyField(OLDArticleDownload, backref='authors')
|
|
author = CharField()
|
|
|
|
|
|
|
|
class OLDArticleRelated(OLDModel):
|
|
class Meta:
|
|
db_table = 'articlerelated'
|
|
|
|
article = ForeignKeyField(OLDArticleDownload, backref='related')
|
|
related_file_name = TextField(default = '')
|
|
|
|
|
|
|
|
|
|
## NEW Models
|
|
class NEWModel(Model):
|
|
class Meta:
|
|
database = download_db
|
|
|
|
|
|
class ArticleDownload(NEWModel):
|
|
# in the beginning this is all we have
|
|
article_url = TextField(default = '', unique=True)
|
|
# fetch then fills in the metadata
|
|
title = TextField(default='')
|
|
summary = TextField(default = '')
|
|
source_name = CharField(default = '')
|
|
language = CharField(default = '')
|
|
file_name = TextField(default = '')
|
|
archive_url = TextField(default = '')
|
|
pub_date = DateField(default = '')
|
|
download_date = DateField(default = 0)
|
|
slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by
|
|
sent = BooleanField(default = False)
|
|
archived_by = CharField(default = os.getenv("UNAME"))
|
|
# need to know who saved the message because the file needs to be on their computer in order to get verified
|
|
# verification happens in a different app, but the model has the fields here as well
|
|
comment = TextField(default = '')
|
|
verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad
|
|
|
|
def set_authors(self, authors):
|
|
for a in authors:
|
|
if len(a) < 100:
|
|
ArticleAuthor.create(
|
|
article = self,
|
|
author = a
|
|
)
|
|
|
|
def set_related(self, related):
|
|
for r in related:
|
|
ArticleRelated.create(
|
|
article = self,
|
|
related_file_name = r
|
|
)
|
|
|
|
# authors
|
|
# keywords
|
|
# ... are added through foreignkeys
|
|
# we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db
|
|
|
|
|
|
|
|
class ArticleAuthor(NEWModel):
|
|
article = ForeignKeyField(ArticleDownload, backref='authors')
|
|
author = CharField()
|
|
|
|
|
|
class ArticleRelated(NEWModel):
|
|
# Related files, such as the full text of a paper, audio files, etc.
|
|
article = ForeignKeyField(ArticleDownload, backref='related')
|
|
related_file_name = TextField(default = '')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
####################################################################
|
|
# Migrate using sensible defaults:
|
|
download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated])
|
|
|
|
it = 0
|
|
for old_art in OLDArticleDownload.select():
|
|
print("====================================================================")
|
|
it+=1
|
|
print(f"IT {it} New article with data:")
|
|
print(
|
|
old_art.article_url,
|
|
old_art.title,
|
|
old_art.summary,
|
|
old_art.source_name,
|
|
old_art.language,
|
|
old_art.file_name,
|
|
old_art.archive_url,
|
|
old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0),
|
|
old_art.download_date,
|
|
True,
|
|
old_art.comment,
|
|
old_art.verified
|
|
)
|
|
new_art = ArticleDownload.create(
|
|
article_url = old_art.article_url,
|
|
title = old_art.title,
|
|
summary = old_art.summary,
|
|
source_name = old_art.source_name,
|
|
language = old_art.language,
|
|
file_name = old_art.file_name,
|
|
archive_url = old_art.archive_url,
|
|
pub_date = old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0),
|
|
download_date = old_art.download_date,
|
|
# slack_ts = FloatField(default = 0)
|
|
sent = True,
|
|
# archived_by = CharField(default = os.getenv("UNAME"))
|
|
comment = old_art.comment,
|
|
verified = old_art.verified
|
|
)
|
|
|
|
|
|
new_art.set_related([r.related_file_name for r in old_art.related])
|
|
new_art.set_authors([a.author for a in old_art.authors])
|
|
|