few bugs in news_fetch left, news_chek wip
This commit is contained in:
		| @@ -34,7 +34,7 @@ services: | ||||
|  | ||||
|  | ||||
|   geckodriver: # separate docker container for pdf-download. This hugely improves stability (and creates shorter build times for the containers) | ||||
|     image: selenium/standalone-firefox:103.0 # latest version because it mirrors the locally installed version (which is automatically updated) | ||||
|     image: ${GECKODRIVER_IMG} | ||||
|     environment: | ||||
|       - START_VNC=${HEADFULL-false} # as opposed to headless, used when requiring supervision (eg. for websites that crash) | ||||
|       - START_XVFB=${HEADFULL-false} | ||||
|   | ||||
							
								
								
									
										10
									
								
								launch
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								launch
									
									
									
									
									
								
							| @@ -5,10 +5,12 @@ set -o ignoreeof | ||||
| echo "Bash script launching COSS_ARCHIVING..." | ||||
|  | ||||
|  | ||||
| # CHANGE ME! | ||||
| # CHANGE ME ONCE! | ||||
| export CONTAINER_DATA=~/Bulk/COSS/Downloads/coss_archiving | ||||
| export UNAME=remy | ||||
|  | ||||
| # CHANGE ME WHEN UPDATING FIREFOX | ||||
| export GECKODRIVER_IMG=selenium/standalone-firefox:103.0 | ||||
| # version must be >= than the one on the host or firefox will not start (because of mismatched config) | ||||
|  | ||||
| if [[ $1 == "debug" ]] | ||||
| then | ||||
| @@ -16,8 +18,8 @@ then | ||||
|     export HEADFULL=true | ||||
|     export CODE=./ | ||||
|     export ENTRYPOINT=/bin/bash | ||||
|     # since service ports is not enough here, also execute up, which will | ||||
|     docker compose up -d | ||||
|     # since service ports does not open ports on implicitly started containers, also start geckodriver: | ||||
|     docker compose up -d geckodriver | ||||
| elif [[ $1 == "production" ]] | ||||
| then | ||||
|     export DEBUG=false | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| import sys | ||||
| from webbrowser import get | ||||
| sys.path.append("../app") | ||||
| import runner | ||||
| import logging | ||||
|   | ||||
							
								
								
									
										170
									
								
								misc/migration.to_postgres.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								misc/migration.to_postgres.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,170 @@ | ||||
| import datetime | ||||
| import sys | ||||
| sys.path.append("../news_fetch/") | ||||
| import configuration # lives in app | ||||
| from peewee import * | ||||
|  | ||||
| import os | ||||
| import time | ||||
|  | ||||
| old_db = SqliteDatabase("/app/containerdata/downloads.db") | ||||
|  | ||||
| cred = configuration.db_config["DATABASE"] | ||||
| download_db = PostgresqlDatabase( | ||||
|     cred["db_name"], user=cred["user_name"], password=cred["password"], host="vpn", port=5432 | ||||
| ) | ||||
|  | ||||
| ## OLD Models | ||||
| class OLDModel(Model): | ||||
|     class Meta: | ||||
|         database = old_db | ||||
|  | ||||
|  | ||||
| class OLDArticleDownload(OLDModel): | ||||
|     class Meta: | ||||
|         db_table = 'articledownload' | ||||
|  | ||||
|     title = CharField(default='') | ||||
|     pub_date = DateField(default = '') | ||||
|     download_date = DateField(default = 0) | ||||
|     source_name = CharField(default = '') | ||||
|     article_url = TextField(default = '', unique=True) | ||||
|     archive_url = TextField(default = '') | ||||
|     file_name = TextField(default = '') | ||||
|     language = CharField(default = '') | ||||
|     summary = TextField(default = '') | ||||
|     comment = TextField(default = '') | ||||
|     verified = IntegerField(default = False) | ||||
|     # authors | ||||
|     # keywords | ||||
|     # ... are added through foreignkeys | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| class OLDArticleAuthor(OLDModel): | ||||
|     class Meta: | ||||
|         db_table = 'articleauthor' | ||||
|  | ||||
|     article = ForeignKeyField(OLDArticleDownload, backref='authors') | ||||
|     author = CharField() | ||||
|  | ||||
|  | ||||
|  | ||||
| class OLDArticleRelated(OLDModel): | ||||
|     class Meta: | ||||
|         db_table = 'articlerelated' | ||||
|  | ||||
|     article = ForeignKeyField(OLDArticleDownload, backref='related') | ||||
|     related_file_name = TextField(default = '') | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| ## NEW Models | ||||
| class NEWModel(Model): | ||||
|     class Meta: | ||||
|         database = download_db | ||||
|  | ||||
|  | ||||
| class ArticleDownload(NEWModel): | ||||
|     # in the beginning this is all we have | ||||
|     article_url = TextField(default = '', unique=True) | ||||
|     # fetch then fills in the metadata | ||||
|     title = TextField(default='') | ||||
|     summary = TextField(default = '') | ||||
|     source_name = CharField(default = '') | ||||
|     language = CharField(default = '') | ||||
|     file_name = TextField(default = '') | ||||
|     archive_url = TextField(default = '') | ||||
|     pub_date = DateField(default = '') | ||||
|     download_date = DateField(default = 0) | ||||
|     slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by | ||||
|     sent = BooleanField(default = False) | ||||
|     archived_by = CharField(default = os.getenv("UNAME")) | ||||
|     # need to know who saved the message because the file needs to be on their computer in order to get verified | ||||
|     # verification happens in a different app, but the model has the fields here as well | ||||
|     comment = TextField(default = '') | ||||
|     verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad | ||||
|      | ||||
|     def set_authors(self, authors): | ||||
|         for a in authors: | ||||
|             if len(a) < 100: | ||||
|                 ArticleAuthor.create( | ||||
|                     article = self, | ||||
|                     author = a | ||||
|             ) | ||||
|  | ||||
|     def set_related(self, related): | ||||
|         for r in related: | ||||
|             ArticleRelated.create( | ||||
|                 article = self, | ||||
|                 related_file_name = r | ||||
|             ) | ||||
|  | ||||
|     # authors | ||||
|     # keywords | ||||
|     # ... are added through foreignkeys | ||||
|     # we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db | ||||
|  | ||||
|     | ||||
|  | ||||
| class ArticleAuthor(NEWModel): | ||||
|     article = ForeignKeyField(ArticleDownload, backref='authors') | ||||
|     author = CharField() | ||||
|  | ||||
|  | ||||
| class ArticleRelated(NEWModel): | ||||
|     # Related files, such as the full text of a paper, audio files, etc. | ||||
|     article = ForeignKeyField(ArticleDownload, backref='related') | ||||
|     related_file_name = TextField(default = '') | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| #################################################################### | ||||
| # Migrate using sensible defaults: | ||||
| download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated]) | ||||
|  | ||||
| it = 0 | ||||
| for old_art in OLDArticleDownload.select(): | ||||
|     print("====================================================================") | ||||
|     it+=1 | ||||
|     print(f"IT {it} New article with data:") | ||||
|     print( | ||||
|         old_art.article_url, | ||||
|         old_art.title, | ||||
|         old_art.summary, | ||||
|         old_art.source_name, | ||||
|         old_art.language, | ||||
|         old_art.file_name, | ||||
|         old_art.archive_url, | ||||
|         old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0), | ||||
|         old_art.download_date, | ||||
|         True, | ||||
|         old_art.comment, | ||||
|         old_art.verified | ||||
|     ) | ||||
|     new_art = ArticleDownload.create( | ||||
|         article_url = old_art.article_url, | ||||
|         title = old_art.title, | ||||
|         summary = old_art.summary, | ||||
|         source_name = old_art.source_name, | ||||
|         language = old_art.language, | ||||
|         file_name = old_art.file_name, | ||||
|         archive_url = old_art.archive_url, | ||||
|         pub_date = old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0), | ||||
|         download_date = old_art.download_date, | ||||
|         # slack_ts = FloatField(default = 0) | ||||
|         sent = True, | ||||
|         # archived_by = CharField(default = os.getenv("UNAME")) | ||||
|         comment = old_art.comment, | ||||
|         verified = old_art.verified | ||||
|     ) | ||||
|      | ||||
|      | ||||
|     new_art.set_related([r.related_file_name for r in old_art.related]) | ||||
|     new_art.set_authors([a.author for a in old_art.authors]) | ||||
|      | ||||
| @@ -2,16 +2,38 @@ | ||||
| 	import PDFView from './PDFView.svelte'; | ||||
| 	import ArticleStatus from './ArticleStatus.svelte'; | ||||
| 	import ArticleOperations from './ArticleOperations.svelte'; | ||||
|  | ||||
| 	let current_id = 0;  | ||||
| 	 | ||||
| 	const updateInterface = (async () => { | ||||
| 		let url = ''; | ||||
| 		if (current_id == 0) { | ||||
| 			url = '/api/article/first'; | ||||
| 		} else { | ||||
| 			url = '/api/article/' + current_id + '/next'; | ||||
| 		} | ||||
| 		const response = await fetch(url) | ||||
| 		const data = await response.json() | ||||
| 		current_id = data.id; | ||||
| 		let article_url = '/api/article/' + current_id + '/get'; | ||||
| 		const article_response = await fetch(article_url); | ||||
| 		const article_data = await article_response.json(); | ||||
| 		return article_data; | ||||
| 	})() | ||||
| 	 | ||||
| 	 | ||||
| </script> | ||||
|  | ||||
|  | ||||
| <div class="flex w-full h-full gap-5 p-5"> | ||||
| 	<div class="w-3/5"><PDFView/></div> | ||||
| {#await updateInterface} | ||||
| ... | ||||
| {:then article_data} | ||||
| 	<div class="flex w-full h-screen gap-5 p-5"> | ||||
| 		<div class="w-3/5"><PDFView article_data={article_data}/></div> | ||||
| 		<div class="divider divider-horizontal"></div>  | ||||
| 		<div class="w-2/5"> | ||||
| 		<ArticleStatus article_id={42}/> | ||||
| 			<ArticleStatus article_data={article_data}/> | ||||
| 			<div class="divider divider-vertical"></div>  | ||||
| 		<ArticleOperations/> | ||||
| 			<ArticleOperations article_data={article_data}/> | ||||
| 		</div> | ||||
| 	</div> | ||||
|  | ||||
| {/await} | ||||
|   | ||||
| @@ -1,28 +1,93 @@ | ||||
| <div class="toast"> | ||||
| 	<div class="alert alert-info"> | ||||
| 	    <div> | ||||
|             <span>New message arrived.</span> | ||||
|         </div> | ||||
|     </div> | ||||
| </div> | ||||
| <script> | ||||
|     import {fade} from 'svelte/transition'; | ||||
|      | ||||
| <div class="grid grid-cols-3 gap-4"> | ||||
|     <div class="highlight">01</div> | ||||
|     <div class="highlight">01</div> | ||||
|     <div class="highlight">01</div> | ||||
|     <div class="highlight">01</div> | ||||
|     <div class="highlight">01</div> | ||||
|     <div class="highlight">01</div> | ||||
|     <div class="highlight">01</div> | ||||
|     <div class="highlight">01</div> | ||||
|     <div class="highlight">01</div> | ||||
|     export let article_data; | ||||
|      | ||||
| </div> | ||||
| <style> | ||||
|     .highlight { | ||||
|         background-color: #f5f5f5; | ||||
|         border-radius: 5px; | ||||
|         padding: 10px; | ||||
|         margin: 10px; | ||||
|     const actions = [ | ||||
|       {name: 'Mark as good (and skip to next)', kbd: 'A'}, | ||||
|       {name: 'Mark as bad (and skip to next)', kbd: 'B'}, | ||||
|       {name: 'Upload related file', kbd: 'R'}, | ||||
|       {name: 'Skip', kbd: 'ctrl'}, | ||||
|     ] | ||||
|  | ||||
|     const toast_states = { | ||||
|       'success' : {class: 'alert-success', text: 'Article updated successfully'}, | ||||
|       'error' : {class: 'alert-error', text: 'Article update failed'}, | ||||
|     } | ||||
| </style> | ||||
|     let toast_state = {}; | ||||
|     let toast_visible = false; | ||||
|  | ||||
|      | ||||
|     function onKeyDown(e) {apiAction(e.key)} | ||||
|     function apiAction(key) { | ||||
|       if (actions.map(d => d.kbd.toLowerCase()).includes(key.toLowerCase())){ // ignore other keypresses | ||||
|  | ||||
|         const updateArticle = (async() => { | ||||
|           const response = await fetch('/api/article/' + article_data.id + '/set', { | ||||
|             method: 'POST', | ||||
|             headers: {'Content-Type': 'application/json'}, | ||||
|             body: JSON.stringify({ | ||||
|               'action': key.toLowerCase(), | ||||
|             }) | ||||
|           }) | ||||
|           const success = response.status == 200; | ||||
|          | ||||
|           if (success){ | ||||
|             showToast('success'); | ||||
|           } else { | ||||
|             showToast('error'); | ||||
|           } | ||||
|  | ||||
|         })() | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     function showToast(state){   | ||||
|       toast_visible = true; | ||||
|       toast_state = toast_states[state]; | ||||
|       setTimeout(() => { | ||||
|         toast_visible = false; | ||||
|       }, 1000) | ||||
|  | ||||
|     } | ||||
| </script> | ||||
|  | ||||
|  | ||||
| <div class="card bg-neutral-300 shadow-xl"> | ||||
|     <div class="card-body"> | ||||
|       <h2 class="card-title">Your options: (click on action or use keyboard)</h2> | ||||
|       <div class="overflow-x-auto"> | ||||
|         <table class="table w-full table-compact"> | ||||
|           <!-- head --> | ||||
|           <thead> | ||||
|             <tr> | ||||
|                 <th>Action</th> | ||||
|                 <th>Keyboard shortcut</th> | ||||
|             </tr> | ||||
|           </thead> | ||||
|           <tbody> | ||||
|             {#each actions as action} | ||||
|                  | ||||
|                 <tr> | ||||
|                     <td><button on:click={() => apiAction(action.kbd)}>{ action.name }</button></td> | ||||
|                     <td><kbd class="kbd">{ action.kbd }</kbd></td> | ||||
|                 </tr> | ||||
|              | ||||
|             {/each} | ||||
|           </tbody> | ||||
|         </table> | ||||
|       </div> | ||||
|     </div> | ||||
| </div> | ||||
|  | ||||
| <svelte:window on:keydown|preventDefault={onKeyDown} /> | ||||
|  | ||||
| {#if toast_visible} | ||||
| <div class="toast" transition:fade> | ||||
|   <div class="alert { toast_state.class }"> | ||||
|       <div> | ||||
|       <span>{ toast_state.text }.</span> | ||||
|       </div> | ||||
|   </div> | ||||
| </div> | ||||
| {/if} | ||||
| @@ -1,25 +1,38 @@ | ||||
| <script> | ||||
|     export let article_id; | ||||
|     const Article = (async () => { | ||||
|     const response = await fetch('/api/article/' + article_id + '/get') | ||||
|     return await response.json() | ||||
| 	})() | ||||
|     console.log(Article) | ||||
|     export let article_data; | ||||
|     const status_items = [ | ||||
|         {name: 'Title', value: article_data.title}, | ||||
|         {name: 'Filename', value: article_data.file_name}, | ||||
|         {name: 'Language', value: article_data.language}, | ||||
|         {name: 'Authors', value: article_data.authors}, | ||||
|         {name: "Related", value: article_data.related}, | ||||
|     ] | ||||
| </script> | ||||
|  | ||||
| <div class="mockup-window border bg-base-300"> | ||||
|     <h1 class="center">Article overview</h1> | ||||
|     <ul tabindex="0" class="menu p-2 shadow bg-base-100 rounded-box w-52"> | ||||
|         {#await Article} | ||||
|             <li>...waiting</li> | ||||
|         {:then data} | ||||
|             <li><a href="#">{data.value}</a></li> | ||||
|             <li><a href="#">Item 2</a></li> | ||||
|         {:catch error} | ||||
|             <li>An error occurred!</li> | ||||
|         {/await} | ||||
|      | ||||
|          | ||||
|     </ul> | ||||
| <div class="card bg-neutral-300 shadow-xl overflow-x-auto"> | ||||
|     <div class="card-body"> | ||||
|       <h2 class="card-title">Article overview:</h2> | ||||
|         <table class="table w-full table-compact" style="table-layout: fixed"> | ||||
|           <thead> | ||||
|             <tr> | ||||
|               <th>Attribute</th> | ||||
|               <th>Value</th> | ||||
|             </tr> | ||||
|           </thead> | ||||
|           <tbody> | ||||
|             {#each status_items as item} | ||||
|                 <tr> | ||||
|                     <td>{ item.name }</td> | ||||
|                     <!-- <td>Quality Control Specialist</td> --> | ||||
|                     {#if item.value != ""} | ||||
|                         <td class='bg-emerald-200' style="white-space: normal">{ item.value }</td> | ||||
|                     {:else} | ||||
|                         <td class='bg-red-200'>{ item.value }</td> | ||||
|                     {/if} | ||||
|                 </tr> | ||||
|             {/each} | ||||
|           </tbody> | ||||
|         </table> | ||||
|       </div> | ||||
|      | ||||
| </div> | ||||
| @@ -1,64 +1,10 @@ | ||||
| <!--  | ||||
| <script> | ||||
| 	var myState = { | ||||
| 		pdf: null, | ||||
| 		currentPage: 1, | ||||
| 		zoom: 1 | ||||
| 	} | ||||
|    | ||||
| 	pdfjsLib.getDocument('test.pdf').then((pdf) => { | ||||
|    | ||||
| 		myState.pdf = pdf; | ||||
| 		render(); | ||||
|  | ||||
| 	}); | ||||
|  | ||||
| 	function render() { | ||||
| 		myState.pdf.getPage(myState.currentPage).then((page) => { | ||||
| 	   | ||||
| 			var canvas = document.getElementById("pdf_renderer"); | ||||
| 			var ctx = canvas.getContext('2d'); | ||||
|    | ||||
| 			var viewport = page.getViewport(myState.zoom); | ||||
|  | ||||
| 			canvas.width = viewport.width; | ||||
| 			canvas.height = viewport.height; | ||||
| 	   | ||||
| 			page.render({ | ||||
| 				canvasContext: ctx, | ||||
| 				viewport: viewport | ||||
| 			}); | ||||
| 		}); | ||||
| 	} | ||||
|  | ||||
| </script> | ||||
|   --> | ||||
| <!-- <div id="my_pdf_viewer"> | ||||
|     <div class="mockup-window border bg-base-300"> | ||||
|         <div id="canvas_container" class="flex justify-center"> | ||||
|             <canvas id="pdf_renderer"></canvas> | ||||
|         </div> | ||||
|     </div> | ||||
|          | ||||
|  | ||||
|     <div id="navigation_controls"> | ||||
|         <button id="go_previous">Previous</button> | ||||
|         <input id="current_page" value="1" type="number"/> | ||||
|         <button id="go_next">Next</button> | ||||
|     </div> | ||||
|  | ||||
|     <div id="zoom_controls">   | ||||
|         <button id="zoom_in">+</button> | ||||
|         <button id="zoom_out">-</button> | ||||
|     </div> | ||||
| </div> --> | ||||
|  | ||||
| <script> | ||||
| 	let pdf_file = 'test.pdf'; | ||||
| 	export let article_data; | ||||
| </script>  | ||||
|  | ||||
| <div class="mockup-window border bg-base-300 h-full w-full"> | ||||
| 	<object class="pdf-view" data="{pdf_file}" title="Article PDF"> </object> | ||||
| <div class="h-full w-full shadow-xl"> | ||||
| 	<object class="pdf-view" data="{article_data.save_path + article_data.file_name}" title="Article PDF"> </object> | ||||
| </div> | ||||
|  | ||||
| <style> | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| import json | ||||
| from flask import Flask, send_from_directory, jsonify | ||||
| import random | ||||
|  | ||||
| from flask import Flask, send_from_directory, request | ||||
| import configuration | ||||
| models = configuration.models | ||||
| db = configuration.db | ||||
| app = Flask(__name__) | ||||
|  | ||||
|  | ||||
| @@ -9,26 +9,55 @@ app = Flask(__name__) | ||||
| # SVELTE 'STATIC' BACKEND. Always send index.html and the requested js-files. (compiled by npm) | ||||
|  | ||||
| @app.route("/") #index.html | ||||
| def base(): | ||||
| def index(): | ||||
|     return send_from_directory('../client/public', 'index.html') | ||||
| @app.route("/<path:path>") #js-files | ||||
| def home(path): | ||||
| def js(path): | ||||
|     return send_from_directory('../client/public', path) | ||||
|  | ||||
| @app.route("/app/containerdata/files/<path:path>") | ||||
| def static_pdf(path): | ||||
|     return send_from_directory('/app/containerdata/files/', path) | ||||
|  | ||||
|  | ||||
|  | ||||
| ############################################################################### | ||||
| # API for news_check. | ||||
| # (simple) API for news_check. | ||||
|  | ||||
| @app.route("/api/article/<int:id>/get") | ||||
| def get_article(id): | ||||
|     res = {"value": id} | ||||
|     return jsonify(res) | ||||
| def get_article_by_id(id): | ||||
|     with db: | ||||
|         article = models.ArticleDownload.get_by_id(id) | ||||
|         return article.to_dict() | ||||
|  | ||||
| @app.route("/api/article/first") | ||||
| def get_article_first(): | ||||
|     with db: | ||||
|         article = models.ArticleDownload.select(models.ArticleDownload.id).where(models.ArticleDownload.verified == 0).order_by(models.ArticleDownload.id).first() | ||||
|         return {"id" : article.id} | ||||
|  | ||||
| @app.route("/api/article/<int:id>/next") | ||||
| def get_article_next(id): | ||||
|     with db: | ||||
|         if models.ArticleDownload.get_by_id(id + 1).verified == 0: | ||||
|             return {"id" : id + 1} | ||||
|         else: | ||||
|             return get_article_first() | ||||
|  | ||||
|  | ||||
|  | ||||
| @app.route("/api/article/<int:id>/set", methods=['POST']) | ||||
| def set_article(id): | ||||
|     return str(random.randint(0, 100)) | ||||
|     action = request.json['action'] | ||||
|     with db: | ||||
|         article = models.ArticleDownload.get_by_id(id) | ||||
|         if action == "a": | ||||
|             article.verified = 1 | ||||
|         elif action == "b": | ||||
|             article.verified = -1 | ||||
|         elif action == "r": | ||||
|             article.set_related() | ||||
|         article.save() | ||||
|         return "ok" | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										16
									
								
								news_check/server/configuration.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								news_check/server/configuration.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | ||||
| from peewee import PostgresqlDatabase | ||||
| import configparser | ||||
|  | ||||
| main_config = configparser.ConfigParser() | ||||
| main_config.read("/app/containerdata/config/news_fetch.config.ini") | ||||
|  | ||||
| db_config = configparser.ConfigParser() | ||||
| db_config.read("/app/containerdata/config/db.config.ini") | ||||
|  | ||||
| cred = db_config["DATABASE"] | ||||
| db = PostgresqlDatabase( | ||||
|     cred["db_name"], user=cred["user_name"], password=cred["password"], host="vpn", port=5432 | ||||
| ) | ||||
|  | ||||
| import models | ||||
| models.set_db(db) | ||||
							
								
								
									
										134
									
								
								news_check/server/models.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								news_check/server/models.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,134 @@ | ||||
| import logging | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| from peewee import * | ||||
| import os | ||||
| import datetime | ||||
| import configuration | ||||
|  | ||||
| config = configuration.main_config["DOWNLOADS"] | ||||
|  | ||||
| # set the nature of the db at runtime | ||||
| download_db = DatabaseProxy() | ||||
|  | ||||
|  | ||||
| class DownloadBaseModel(Model): | ||||
|     class Meta: | ||||
|         database = download_db | ||||
|  | ||||
|  | ||||
|  | ||||
| ## == Article related models == ## | ||||
| class ArticleDownload(DownloadBaseModel): | ||||
|     # in the beginning this is all we have | ||||
|     article_url = TextField(default = '', unique=True) | ||||
|      | ||||
|     # fetch then fills in the metadata | ||||
|     title = TextField(default='') | ||||
|  | ||||
|     summary = TextField(default = '') | ||||
|     source_name = CharField(default = '') | ||||
|     language = CharField(default = '') | ||||
|  | ||||
|  | ||||
|     file_name = TextField(default = '') | ||||
|     @property | ||||
|     def save_path(self): | ||||
|         return f"{config['local_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/" | ||||
|     @property | ||||
|     def fname_nas(self, file_name=""): | ||||
|         if self.download_date: | ||||
|             if file_name: | ||||
|                 return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{file_name}" | ||||
|             else: # return the self. name | ||||
|                 return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{self.file_name}" | ||||
|         else: | ||||
|             return None | ||||
|  | ||||
|      | ||||
|     archive_url = TextField(default = '') | ||||
|     pub_date = DateField(default = datetime.date.fromtimestamp(0)) | ||||
|     download_date = DateField(default = datetime.date.today) | ||||
|  | ||||
|     slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by | ||||
|  | ||||
|     sent = BooleanField(default = False) | ||||
|      | ||||
|     archived_by = CharField(default = os.getenv("UNAME")) | ||||
|     # need to know who saved the message because the file needs to be on their computer in order to get verified | ||||
|     # verification happens in a different app, but the model has the fields here as well | ||||
|     comment = TextField(default = '') | ||||
|     verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad | ||||
|  | ||||
|     # authors | ||||
|     # keywords | ||||
|     # ... are added through foreignkeys | ||||
|     # we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db | ||||
|  | ||||
|     def to_dict(self): | ||||
|         return { | ||||
|             "id": self.id, | ||||
|             "article_url": self.article_url, | ||||
|             "title": self.title, | ||||
|             "summary": self.summary, | ||||
|             "source_name": self.source_name, | ||||
|             "language": self.language, | ||||
|             "file_name": self.file_name, | ||||
|             "save_path": self.save_path, | ||||
|             "fname_nas": self.fname_nas, | ||||
|             "archive_url": self.archive_url, | ||||
|             "pub_date": self.pub_date.strftime("%Y-%m-%d"), | ||||
|             "download_date": self.download_date.strftime("%Y-%m-%d"), | ||||
|             "sent": self.sent, | ||||
|             "comment": self.comment, | ||||
|             "related": [r.related_file_name for r in self.related], | ||||
|             "authors": [a.author for a in self.authors] | ||||
|         } | ||||
|  | ||||
|  | ||||
|  | ||||
|     def set_related(self, related): | ||||
|         for r in related: | ||||
|             if len(r) > 255: | ||||
|                 raise Exception("Related file name too long for POSTGRES") | ||||
|  | ||||
|             ArticleRelated.create( | ||||
|                 article = self, | ||||
|                 related_file_name = r | ||||
|             ) | ||||
|  | ||||
|     def file_status(self): | ||||
|         if not self.file_name: | ||||
|             logger.error(f"Article {self} has no filename!") | ||||
|             return False, {"reply_text": "Download failed, no file was saved.", "file_path": None} | ||||
|          | ||||
|         file_path_abs = self.save_path + self.file_name | ||||
|         if not os.path.exists(file_path_abs): | ||||
|             logger.error(f"Article {self} has a filename, but the file does not exist at that location!") | ||||
|             return False, {"reply_text": "Can't find file. Either the download failed or the file was moved.", "file_path": None} | ||||
|  | ||||
|         return True, {} | ||||
|  | ||||
|  | ||||
| class ArticleAuthor(DownloadBaseModel): | ||||
|     article = ForeignKeyField(ArticleDownload, backref='authors') | ||||
|     author = CharField() | ||||
|  | ||||
|  | ||||
| class ArticleRelated(DownloadBaseModel): | ||||
|     # Related files, such as the full text of a paper, audio files, etc. | ||||
|     article = ForeignKeyField(ArticleDownload, backref='related') | ||||
|     related_file_name = TextField(default = '') | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| def set_db(download_db_object): | ||||
|     download_db.initialize(download_db_object) | ||||
|     with download_db: # create tables (does nothing if they exist already) | ||||
|         download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated]) | ||||
|  | ||||
|  | ||||
| @@ -1,20 +0,0 @@ | ||||
| import peewee | ||||
|  | ||||
| db = peewee.PostgresqlDatabase('coss_archiving', user='ca_rw', password='pleasechangeit', host='vpn', port=5432) | ||||
| # db.connect() | ||||
|  | ||||
|  | ||||
| class Pet(peewee.Model): | ||||
|     name = peewee.CharField() | ||||
|     animal_type = peewee.CharField() | ||||
|  | ||||
|     class Meta: | ||||
|         database = db # this model uses the "people.db" database | ||||
| with db: | ||||
|     db.create_tables([Pet]) | ||||
| db.get_tables() | ||||
|  | ||||
| t = Pet.create(name="Test", animal_type="test") | ||||
|  | ||||
| for pet in Pet.select(): | ||||
|     print(pet.name) | ||||
| @@ -8,3 +8,4 @@ newspaper3k | ||||
| htmldate | ||||
| markdown | ||||
| rich | ||||
| psycopg2 | ||||
| @@ -123,7 +123,6 @@ class Coordinator(Thread): | ||||
|         unsent = models.ArticleDownload.filter(sent = False) | ||||
|         # .objects.filter(sent = False) | ||||
|         for a in unsent: | ||||
|             print(a) | ||||
|             self.incoming_request(article=a) | ||||
|  | ||||
|  | ||||
| @@ -170,7 +169,7 @@ class Coordinator(Thread): | ||||
|  | ||||
|         for article in articles: | ||||
|             notifier = lambda article: logger.info(f"Completed manual actions for {article}") | ||||
|             ArticleWatcher(article, None, workers_manual = workers, notifier = notifier) # Article watcher wants a thread to link article to TODO: handle threads as a kwarg  | ||||
|             ArticleWatcher(article, workers_manual = workers, notifier = notifier) # Article watcher wants a thread to link article to TODO: handle threads as a kwarg  | ||||
|  | ||||
|     def article_complete_notifier(self, article): | ||||
|         if self.worker_slack is None: | ||||
| @@ -192,7 +191,7 @@ if __name__ == "__main__": | ||||
|  | ||||
|  | ||||
|     if "upload" in sys.argv: | ||||
|         articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "").execute() | ||||
|         articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "" or models.ArticleDownload.archive_url == "TODO:UPLOAD").execute() | ||||
|         logger.info(f"Launching upload to archive for {len(articles)} articles.") | ||||
|         coordinator.manual_processing(articles, [UploadWorker()]) | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,6 @@ logger = logging.getLogger(__name__) | ||||
| from peewee import * | ||||
| import os | ||||
| import markdown | ||||
| import re | ||||
| import configuration | ||||
| import datetime | ||||
|  | ||||
| @@ -28,7 +27,7 @@ class ArticleDownload(DownloadBaseModel): | ||||
|     article_url = TextField(default = '', unique=True) | ||||
|      | ||||
|     # fetch then fills in the metadata | ||||
|     title = CharField(default='') | ||||
|     title = TextField(default='') | ||||
|     @property | ||||
|     def is_title_bad(self):  # add incrementally | ||||
|         return "PUR-Abo" in self.title \ | ||||
| @@ -63,7 +62,7 @@ class ArticleDownload(DownloadBaseModel): | ||||
|  | ||||
|      | ||||
|     archive_url = TextField(default = '') | ||||
|     pub_date = DateField(default = '') | ||||
|     pub_date = DateField(default = datetime.date.fromtimestamp(0)) | ||||
|     download_date = DateField(default = datetime.date.today) | ||||
|  | ||||
|     slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by | ||||
| @@ -143,6 +142,7 @@ class ArticleDownload(DownloadBaseModel): | ||||
|  | ||||
|     def set_authors(self, authors): | ||||
|         for a in authors: | ||||
|             if len(a) < 100: # otherwise it's a mismatched string | ||||
|                 ArticleAuthor.create( | ||||
|                     article = self, | ||||
|                     author = a | ||||
| @@ -150,6 +150,9 @@ class ArticleDownload(DownloadBaseModel): | ||||
|  | ||||
|     def set_related(self, related): | ||||
|         for r in related: | ||||
|             if len(r) > 255: | ||||
|                 raise Exception("Related file name too long for POSTGRES") | ||||
|  | ||||
|             ArticleRelated.create( | ||||
|                 article = self, | ||||
|                 related_file_name = r | ||||
| @@ -182,116 +185,7 @@ class ArticleRelated(DownloadBaseModel): | ||||
|  | ||||
|  | ||||
|  | ||||
| # class Thread(ChatBaseModel): | ||||
| #     """The threads that concern us are only created if the base massage contains a url""" | ||||
| #     thread_ts = FloatField(default = 0) | ||||
| #     article = ForeignKeyField(ArticleDownload, backref="slack_thread", null=True, default=None) | ||||
| #     # provides, ts, user, models | ||||
| #     # messages | ||||
|  | ||||
| #     @property | ||||
| #     def slack_ts(self): | ||||
| #         str_ts = str(self.thread_ts) | ||||
| #         cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem! | ||||
| #         return "{}{}".format(str_ts, cut_zeros*"0") | ||||
|  | ||||
| #     @property | ||||
| #     def initiator_message(self): | ||||
| #         try: | ||||
| #             return self.messages[0] # TODO check if this needs sorting | ||||
| #         except IndexError: | ||||
| #             logger.warning(f"Thread {self} is empty. How can that be?") | ||||
| #             return None | ||||
|  | ||||
| #     @property | ||||
| #     def message_count(self): | ||||
| #         # logger.warning("message_count was called") | ||||
| #         return self.messages.count() | ||||
|  | ||||
| #     @property | ||||
| #     def last_message(self): | ||||
| #         messages = Message.select().where(Message.thread == self).order_by(Message.ts) # can't be empty by definition/creation | ||||
| #         return messages[-1] | ||||
|  | ||||
| #     @property | ||||
| #     def is_fully_processed(self) -> bool: | ||||
| #         init_message = self.initiator_message | ||||
| #         if init_message is None: | ||||
| #             return False | ||||
|          | ||||
| #         if init_message.is_processed_override: | ||||
| #             return True | ||||
| #         # this override is set for instance, when no url was sent at all. Then set this thread to be ignored | ||||
|          | ||||
| #         reactions = init_message.reaction | ||||
| #         if not reactions: | ||||
| #             return False | ||||
| #         else: | ||||
| #             r = reactions[0].type # can and should only have one reaction | ||||
| #             return r == "white_check_mark" \ | ||||
| #                 or r == "x" | ||||
|  | ||||
|  | ||||
|      | ||||
| # class Message(ChatBaseModel): | ||||
| #     ts = FloatField(unique=True) #for sorting | ||||
| #     channel_id = CharField(default='') | ||||
| #     user = ForeignKeyField(User, backref="messages") | ||||
| #     text = TextField(default='') | ||||
| #     thread = ForeignKeyField(Thread, backref="messages", default=None) | ||||
| #     file_type = CharField(default='') | ||||
| #     perma_link = CharField(default='') | ||||
| #     is_processed_override = BooleanField(default=False) | ||||
| #     # reaction | ||||
|  | ||||
| #     def __str__(self) -> str: | ||||
| #         return "MSG [{}]".format(shorten_name(self.text).replace('\n','/')) | ||||
|  | ||||
| #     @property | ||||
| #     def slack_ts(self): | ||||
| #         str_ts = str(self.ts) | ||||
| #         cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem! | ||||
| #         return "{}{}".format(str_ts, cut_zeros * "0") | ||||
|  | ||||
|  | ||||
| #     @property | ||||
| #     def urls(self): | ||||
| #         pattern = r"<(.*?)>" | ||||
| #         matches = re.findall(pattern, self.text) | ||||
| #         matches = [m for m in matches if "." in m] | ||||
|          | ||||
| #         new_matches = [] | ||||
| #         for m in matches: | ||||
| #             if "." in m:  # must contain a tld, right? | ||||
| #                 # further complication: slack automatically abreviates urls in the format:  | ||||
| #                 # <url|link preview>. Lucky for us, "|" is a character derecommended in urls, meaning we can "safely" split for it and retain the first half | ||||
| #                 if "|" in m: | ||||
| #                     keep = m.split("|")[0] | ||||
| #                 else: | ||||
| #                     keep = m | ||||
| #                 new_matches.append(keep) | ||||
| #         return new_matches | ||||
|      | ||||
| #     @property | ||||
| #     def is_by_human(self): | ||||
| #         return self.user.user_id != slack_config["bot_id"] | ||||
|  | ||||
|      | ||||
| #     @property | ||||
| #     def has_single_url(self): | ||||
| #         return len(self.urls) == 1 | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| def set_db(download_db_object): | ||||
|     download_db.initialize(download_db_object) | ||||
|     with download_db: # create tables (does nothing if they exist already) | ||||
|         download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated]) | ||||
|  | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user