import datetime import sys sys.path.append("../news_fetch/") import configuration # lives in app from peewee import * import os import time old_db = SqliteDatabase("/app/containerdata/downloads.db") cred = configuration.db_config["DATABASE"] download_db = PostgresqlDatabase( cred["db_name"], user=cred["user_name"], password=cred["password"], host="vpn", port=5432 ) ## OLD Models class OLDModel(Model): class Meta: database = old_db class OLDArticleDownload(OLDModel): class Meta: db_table = 'articledownload' title = CharField(default='') pub_date = DateField(default = '') download_date = DateField(default = 0) source_name = CharField(default = '') article_url = TextField(default = '', unique=True) archive_url = TextField(default = '') file_name = TextField(default = '') language = CharField(default = '') summary = TextField(default = '') comment = TextField(default = '') verified = IntegerField(default = False) # authors # keywords # ... are added through foreignkeys class OLDArticleAuthor(OLDModel): class Meta: db_table = 'articleauthor' article = ForeignKeyField(OLDArticleDownload, backref='authors') author = CharField() class OLDArticleRelated(OLDModel): class Meta: db_table = 'articlerelated' article = ForeignKeyField(OLDArticleDownload, backref='related') related_file_name = TextField(default = '') ## NEW Models class NEWModel(Model): class Meta: database = download_db class ArticleDownload(NEWModel): # in the beginning this is all we have article_url = TextField(default = '', unique=True) # fetch then fills in the metadata title = TextField(default='') summary = TextField(default = '') source_name = CharField(default = '') language = CharField(default = '') file_name = TextField(default = '') archive_url = TextField(default = '') pub_date = DateField(default = '') download_date = DateField(default = 0) slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by sent = BooleanField(default = False) archived_by = CharField(default = os.getenv("UNAME")) # need to know who saved the message because the file needs to be on their computer in order to get verified # verification happens in a different app, but the model has the fields here as well comment = TextField(default = '') verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad def set_authors(self, authors): for a in authors: if len(a) < 100: ArticleAuthor.create( article = self, author = a ) def set_related(self, related): for r in related: ArticleRelated.create( article = self, related_file_name = r ) # authors # keywords # ... are added through foreignkeys # we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db class ArticleAuthor(NEWModel): article = ForeignKeyField(ArticleDownload, backref='authors') author = CharField() class ArticleRelated(NEWModel): # Related files, such as the full text of a paper, audio files, etc. article = ForeignKeyField(ArticleDownload, backref='related') related_file_name = TextField(default = '') #################################################################### # Migrate using sensible defaults: download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated]) it = 0 for old_art in OLDArticleDownload.select(): print("====================================================================") it+=1 print(f"IT {it} New article with data:") print( old_art.article_url, old_art.title, old_art.summary, old_art.source_name, old_art.language, old_art.file_name, old_art.archive_url, old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0), old_art.download_date, True, old_art.comment, old_art.verified ) new_art = ArticleDownload.create( article_url = old_art.article_url, title = old_art.title, summary = old_art.summary, source_name = old_art.source_name, language = old_art.language, file_name = old_art.file_name, archive_url = old_art.archive_url, pub_date = old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0), download_date = old_art.download_date, # slack_ts = FloatField(default = 0) sent = True, # archived_by = CharField(default = os.getenv("UNAME")) comment = old_art.comment, verified = old_art.verified ) new_art.set_related([r.related_file_name for r in old_art.related]) new_art.set_authors([a.author for a in old_art.authors])