few bugs in news_fetch left, news_chek wip
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
import sys
|
||||
from webbrowser import get
|
||||
sys.path.append("../app")
|
||||
import runner
|
||||
import logging
|
||||
|
170
misc/migration.to_postgres.py
Normal file
170
misc/migration.to_postgres.py
Normal file
@@ -0,0 +1,170 @@
|
||||
import datetime
|
||||
import sys
|
||||
sys.path.append("../news_fetch/")
|
||||
import configuration # lives in app
|
||||
from peewee import *
|
||||
|
||||
import os
|
||||
import time
|
||||
|
||||
old_db = SqliteDatabase("/app/containerdata/downloads.db")
|
||||
|
||||
cred = configuration.db_config["DATABASE"]
|
||||
download_db = PostgresqlDatabase(
|
||||
cred["db_name"], user=cred["user_name"], password=cred["password"], host="vpn", port=5432
|
||||
)
|
||||
|
||||
## OLD Models
|
||||
class OLDModel(Model):
|
||||
class Meta:
|
||||
database = old_db
|
||||
|
||||
|
||||
class OLDArticleDownload(OLDModel):
|
||||
class Meta:
|
||||
db_table = 'articledownload'
|
||||
|
||||
title = CharField(default='')
|
||||
pub_date = DateField(default = '')
|
||||
download_date = DateField(default = 0)
|
||||
source_name = CharField(default = '')
|
||||
article_url = TextField(default = '', unique=True)
|
||||
archive_url = TextField(default = '')
|
||||
file_name = TextField(default = '')
|
||||
language = CharField(default = '')
|
||||
summary = TextField(default = '')
|
||||
comment = TextField(default = '')
|
||||
verified = IntegerField(default = False)
|
||||
# authors
|
||||
# keywords
|
||||
# ... are added through foreignkeys
|
||||
|
||||
|
||||
|
||||
|
||||
class OLDArticleAuthor(OLDModel):
|
||||
class Meta:
|
||||
db_table = 'articleauthor'
|
||||
|
||||
article = ForeignKeyField(OLDArticleDownload, backref='authors')
|
||||
author = CharField()
|
||||
|
||||
|
||||
|
||||
class OLDArticleRelated(OLDModel):
|
||||
class Meta:
|
||||
db_table = 'articlerelated'
|
||||
|
||||
article = ForeignKeyField(OLDArticleDownload, backref='related')
|
||||
related_file_name = TextField(default = '')
|
||||
|
||||
|
||||
|
||||
|
||||
## NEW Models
|
||||
class NEWModel(Model):
|
||||
class Meta:
|
||||
database = download_db
|
||||
|
||||
|
||||
class ArticleDownload(NEWModel):
|
||||
# in the beginning this is all we have
|
||||
article_url = TextField(default = '', unique=True)
|
||||
# fetch then fills in the metadata
|
||||
title = TextField(default='')
|
||||
summary = TextField(default = '')
|
||||
source_name = CharField(default = '')
|
||||
language = CharField(default = '')
|
||||
file_name = TextField(default = '')
|
||||
archive_url = TextField(default = '')
|
||||
pub_date = DateField(default = '')
|
||||
download_date = DateField(default = 0)
|
||||
slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by
|
||||
sent = BooleanField(default = False)
|
||||
archived_by = CharField(default = os.getenv("UNAME"))
|
||||
# need to know who saved the message because the file needs to be on their computer in order to get verified
|
||||
# verification happens in a different app, but the model has the fields here as well
|
||||
comment = TextField(default = '')
|
||||
verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad
|
||||
|
||||
def set_authors(self, authors):
|
||||
for a in authors:
|
||||
if len(a) < 100:
|
||||
ArticleAuthor.create(
|
||||
article = self,
|
||||
author = a
|
||||
)
|
||||
|
||||
def set_related(self, related):
|
||||
for r in related:
|
||||
ArticleRelated.create(
|
||||
article = self,
|
||||
related_file_name = r
|
||||
)
|
||||
|
||||
# authors
|
||||
# keywords
|
||||
# ... are added through foreignkeys
|
||||
# we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db
|
||||
|
||||
|
||||
|
||||
class ArticleAuthor(NEWModel):
|
||||
article = ForeignKeyField(ArticleDownload, backref='authors')
|
||||
author = CharField()
|
||||
|
||||
|
||||
class ArticleRelated(NEWModel):
|
||||
# Related files, such as the full text of a paper, audio files, etc.
|
||||
article = ForeignKeyField(ArticleDownload, backref='related')
|
||||
related_file_name = TextField(default = '')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
####################################################################
|
||||
# Migrate using sensible defaults:
|
||||
download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated])
|
||||
|
||||
it = 0
|
||||
for old_art in OLDArticleDownload.select():
|
||||
print("====================================================================")
|
||||
it+=1
|
||||
print(f"IT {it} New article with data:")
|
||||
print(
|
||||
old_art.article_url,
|
||||
old_art.title,
|
||||
old_art.summary,
|
||||
old_art.source_name,
|
||||
old_art.language,
|
||||
old_art.file_name,
|
||||
old_art.archive_url,
|
||||
old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0),
|
||||
old_art.download_date,
|
||||
True,
|
||||
old_art.comment,
|
||||
old_art.verified
|
||||
)
|
||||
new_art = ArticleDownload.create(
|
||||
article_url = old_art.article_url,
|
||||
title = old_art.title,
|
||||
summary = old_art.summary,
|
||||
source_name = old_art.source_name,
|
||||
language = old_art.language,
|
||||
file_name = old_art.file_name,
|
||||
archive_url = old_art.archive_url,
|
||||
pub_date = old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0),
|
||||
download_date = old_art.download_date,
|
||||
# slack_ts = FloatField(default = 0)
|
||||
sent = True,
|
||||
# archived_by = CharField(default = os.getenv("UNAME"))
|
||||
comment = old_art.comment,
|
||||
verified = old_art.verified
|
||||
)
|
||||
|
||||
|
||||
new_art.set_related([r.related_file_name for r in old_art.related])
|
||||
new_art.set_authors([a.author for a in old_art.authors])
|
||||
|
Reference in New Issue
Block a user