few bugs in news_fetch left, news_chek wip
This commit is contained in:
		| @@ -34,7 +34,7 @@ services: | |||||||
|  |  | ||||||
|  |  | ||||||
|   geckodriver: # separate docker container for pdf-download. This hugely improves stability (and creates shorter build times for the containers) |   geckodriver: # separate docker container for pdf-download. This hugely improves stability (and creates shorter build times for the containers) | ||||||
|     image: selenium/standalone-firefox:103.0 # latest version because it mirrors the locally installed version (which is automatically updated) |     image: ${GECKODRIVER_IMG} | ||||||
|     environment: |     environment: | ||||||
|       - START_VNC=${HEADFULL-false} # as opposed to headless, used when requiring supervision (eg. for websites that crash) |       - START_VNC=${HEADFULL-false} # as opposed to headless, used when requiring supervision (eg. for websites that crash) | ||||||
|       - START_XVFB=${HEADFULL-false} |       - START_XVFB=${HEADFULL-false} | ||||||
|   | |||||||
							
								
								
									
										10
									
								
								launch
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								launch
									
									
									
									
									
								
							| @@ -5,10 +5,12 @@ set -o ignoreeof | |||||||
| echo "Bash script launching COSS_ARCHIVING..." | echo "Bash script launching COSS_ARCHIVING..." | ||||||
|  |  | ||||||
|  |  | ||||||
| # CHANGE ME! | # CHANGE ME ONCE! | ||||||
| export CONTAINER_DATA=~/Bulk/COSS/Downloads/coss_archiving | export CONTAINER_DATA=~/Bulk/COSS/Downloads/coss_archiving | ||||||
| export UNAME=remy | export UNAME=remy | ||||||
|  | # CHANGE ME WHEN UPDATING FIREFOX | ||||||
|  | export GECKODRIVER_IMG=selenium/standalone-firefox:103.0 | ||||||
|  | # version must be >= than the one on the host or firefox will not start (because of mismatched config) | ||||||
|  |  | ||||||
| if [[ $1 == "debug" ]] | if [[ $1 == "debug" ]] | ||||||
| then | then | ||||||
| @@ -16,8 +18,8 @@ then | |||||||
|     export HEADFULL=true |     export HEADFULL=true | ||||||
|     export CODE=./ |     export CODE=./ | ||||||
|     export ENTRYPOINT=/bin/bash |     export ENTRYPOINT=/bin/bash | ||||||
|     # since service ports is not enough here, also execute up, which will |     # since service ports does not open ports on implicitly started containers, also start geckodriver: | ||||||
|     docker compose up -d |     docker compose up -d geckodriver | ||||||
| elif [[ $1 == "production" ]] | elif [[ $1 == "production" ]] | ||||||
| then | then | ||||||
|     export DEBUG=false |     export DEBUG=false | ||||||
|   | |||||||
| @@ -1,5 +1,4 @@ | |||||||
| import sys | import sys | ||||||
| from webbrowser import get |  | ||||||
| sys.path.append("../app") | sys.path.append("../app") | ||||||
| import runner | import runner | ||||||
| import logging | import logging | ||||||
|   | |||||||
							
								
								
									
										170
									
								
								misc/migration.to_postgres.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								misc/migration.to_postgres.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,170 @@ | |||||||
|  | import datetime | ||||||
|  | import sys | ||||||
|  | sys.path.append("../news_fetch/") | ||||||
|  | import configuration # lives in app | ||||||
|  | from peewee import * | ||||||
|  |  | ||||||
|  | import os | ||||||
|  | import time | ||||||
|  |  | ||||||
|  | old_db = SqliteDatabase("/app/containerdata/downloads.db") | ||||||
|  |  | ||||||
|  | cred = configuration.db_config["DATABASE"] | ||||||
|  | download_db = PostgresqlDatabase( | ||||||
|  |     cred["db_name"], user=cred["user_name"], password=cred["password"], host="vpn", port=5432 | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | ## OLD Models | ||||||
|  | class OLDModel(Model): | ||||||
|  |     class Meta: | ||||||
|  |         database = old_db | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class OLDArticleDownload(OLDModel): | ||||||
|  |     class Meta: | ||||||
|  |         db_table = 'articledownload' | ||||||
|  |  | ||||||
|  |     title = CharField(default='') | ||||||
|  |     pub_date = DateField(default = '') | ||||||
|  |     download_date = DateField(default = 0) | ||||||
|  |     source_name = CharField(default = '') | ||||||
|  |     article_url = TextField(default = '', unique=True) | ||||||
|  |     archive_url = TextField(default = '') | ||||||
|  |     file_name = TextField(default = '') | ||||||
|  |     language = CharField(default = '') | ||||||
|  |     summary = TextField(default = '') | ||||||
|  |     comment = TextField(default = '') | ||||||
|  |     verified = IntegerField(default = False) | ||||||
|  |     # authors | ||||||
|  |     # keywords | ||||||
|  |     # ... are added through foreignkeys | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class OLDArticleAuthor(OLDModel): | ||||||
|  |     class Meta: | ||||||
|  |         db_table = 'articleauthor' | ||||||
|  |  | ||||||
|  |     article = ForeignKeyField(OLDArticleDownload, backref='authors') | ||||||
|  |     author = CharField() | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class OLDArticleRelated(OLDModel): | ||||||
|  |     class Meta: | ||||||
|  |         db_table = 'articlerelated' | ||||||
|  |  | ||||||
|  |     article = ForeignKeyField(OLDArticleDownload, backref='related') | ||||||
|  |     related_file_name = TextField(default = '') | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## NEW Models | ||||||
|  | class NEWModel(Model): | ||||||
|  |     class Meta: | ||||||
|  |         database = download_db | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ArticleDownload(NEWModel): | ||||||
|  |     # in the beginning this is all we have | ||||||
|  |     article_url = TextField(default = '', unique=True) | ||||||
|  |     # fetch then fills in the metadata | ||||||
|  |     title = TextField(default='') | ||||||
|  |     summary = TextField(default = '') | ||||||
|  |     source_name = CharField(default = '') | ||||||
|  |     language = CharField(default = '') | ||||||
|  |     file_name = TextField(default = '') | ||||||
|  |     archive_url = TextField(default = '') | ||||||
|  |     pub_date = DateField(default = '') | ||||||
|  |     download_date = DateField(default = 0) | ||||||
|  |     slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by | ||||||
|  |     sent = BooleanField(default = False) | ||||||
|  |     archived_by = CharField(default = os.getenv("UNAME")) | ||||||
|  |     # need to know who saved the message because the file needs to be on their computer in order to get verified | ||||||
|  |     # verification happens in a different app, but the model has the fields here as well | ||||||
|  |     comment = TextField(default = '') | ||||||
|  |     verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad | ||||||
|  |      | ||||||
|  |     def set_authors(self, authors): | ||||||
|  |         for a in authors: | ||||||
|  |             if len(a) < 100: | ||||||
|  |                 ArticleAuthor.create( | ||||||
|  |                     article = self, | ||||||
|  |                     author = a | ||||||
|  |             ) | ||||||
|  |  | ||||||
|  |     def set_related(self, related): | ||||||
|  |         for r in related: | ||||||
|  |             ArticleRelated.create( | ||||||
|  |                 article = self, | ||||||
|  |                 related_file_name = r | ||||||
|  |             ) | ||||||
|  |  | ||||||
|  |     # authors | ||||||
|  |     # keywords | ||||||
|  |     # ... are added through foreignkeys | ||||||
|  |     # we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db | ||||||
|  |  | ||||||
|  |     | ||||||
|  |  | ||||||
|  | class ArticleAuthor(NEWModel): | ||||||
|  |     article = ForeignKeyField(ArticleDownload, backref='authors') | ||||||
|  |     author = CharField() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ArticleRelated(NEWModel): | ||||||
|  |     # Related files, such as the full text of a paper, audio files, etc. | ||||||
|  |     article = ForeignKeyField(ArticleDownload, backref='related') | ||||||
|  |     related_file_name = TextField(default = '') | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | #################################################################### | ||||||
|  | # Migrate using sensible defaults: | ||||||
|  | download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated]) | ||||||
|  |  | ||||||
|  | it = 0 | ||||||
|  | for old_art in OLDArticleDownload.select(): | ||||||
|  |     print("====================================================================") | ||||||
|  |     it+=1 | ||||||
|  |     print(f"IT {it} New article with data:") | ||||||
|  |     print( | ||||||
|  |         old_art.article_url, | ||||||
|  |         old_art.title, | ||||||
|  |         old_art.summary, | ||||||
|  |         old_art.source_name, | ||||||
|  |         old_art.language, | ||||||
|  |         old_art.file_name, | ||||||
|  |         old_art.archive_url, | ||||||
|  |         old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0), | ||||||
|  |         old_art.download_date, | ||||||
|  |         True, | ||||||
|  |         old_art.comment, | ||||||
|  |         old_art.verified | ||||||
|  |     ) | ||||||
|  |     new_art = ArticleDownload.create( | ||||||
|  |         article_url = old_art.article_url, | ||||||
|  |         title = old_art.title, | ||||||
|  |         summary = old_art.summary, | ||||||
|  |         source_name = old_art.source_name, | ||||||
|  |         language = old_art.language, | ||||||
|  |         file_name = old_art.file_name, | ||||||
|  |         archive_url = old_art.archive_url, | ||||||
|  |         pub_date = old_art.pub_date if old_art.pub_date != "" else datetime.date.fromtimestamp(0), | ||||||
|  |         download_date = old_art.download_date, | ||||||
|  |         # slack_ts = FloatField(default = 0) | ||||||
|  |         sent = True, | ||||||
|  |         # archived_by = CharField(default = os.getenv("UNAME")) | ||||||
|  |         comment = old_art.comment, | ||||||
|  |         verified = old_art.verified | ||||||
|  |     ) | ||||||
|  |      | ||||||
|  |      | ||||||
|  |     new_art.set_related([r.related_file_name for r in old_art.related]) | ||||||
|  |     new_art.set_authors([a.author for a in old_art.authors]) | ||||||
|  |      | ||||||
| @@ -2,16 +2,38 @@ | |||||||
| 	import PDFView from './PDFView.svelte'; | 	import PDFView from './PDFView.svelte'; | ||||||
| 	import ArticleStatus from './ArticleStatus.svelte'; | 	import ArticleStatus from './ArticleStatus.svelte'; | ||||||
| 	import ArticleOperations from './ArticleOperations.svelte'; | 	import ArticleOperations from './ArticleOperations.svelte'; | ||||||
|  |  | ||||||
|  | 	let current_id = 0;  | ||||||
|  | 	 | ||||||
|  | 	const updateInterface = (async () => { | ||||||
|  | 		let url = ''; | ||||||
|  | 		if (current_id == 0) { | ||||||
|  | 			url = '/api/article/first'; | ||||||
|  | 		} else { | ||||||
|  | 			url = '/api/article/' + current_id + '/next'; | ||||||
|  | 		} | ||||||
|  | 		const response = await fetch(url) | ||||||
|  | 		const data = await response.json() | ||||||
|  | 		current_id = data.id; | ||||||
|  | 		let article_url = '/api/article/' + current_id + '/get'; | ||||||
|  | 		const article_response = await fetch(article_url); | ||||||
|  | 		const article_data = await article_response.json(); | ||||||
|  | 		return article_data; | ||||||
|  | 	})() | ||||||
|  | 	 | ||||||
|  | 	 | ||||||
| </script> | </script> | ||||||
|  |  | ||||||
|  | {#await updateInterface} | ||||||
| <div class="flex w-full h-full gap-5 p-5"> | ... | ||||||
| 	<div class="w-3/5"><PDFView/></div> | {:then article_data} | ||||||
|  | 	<div class="flex w-full h-screen gap-5 p-5"> | ||||||
|  | 		<div class="w-3/5"><PDFView article_data={article_data}/></div> | ||||||
| 		<div class="divider divider-horizontal"></div>  | 		<div class="divider divider-horizontal"></div>  | ||||||
| 		<div class="w-2/5"> | 		<div class="w-2/5"> | ||||||
| 		<ArticleStatus article_id={42}/> | 			<ArticleStatus article_data={article_data}/> | ||||||
| 			<div class="divider divider-vertical"></div>  | 			<div class="divider divider-vertical"></div>  | ||||||
| 		<ArticleOperations/> | 			<ArticleOperations article_data={article_data}/> | ||||||
| 		</div> | 		</div> | ||||||
| 	</div> | 	</div> | ||||||
|  | {/await} | ||||||
|   | |||||||
| @@ -1,28 +1,93 @@ | |||||||
| <div class="toast"> | <script> | ||||||
| 	<div class="alert alert-info"> |     import {fade} from 'svelte/transition'; | ||||||
| 	    <div> |  | ||||||
|             <span>New message arrived.</span> |  | ||||||
|         </div> |  | ||||||
|     </div> |  | ||||||
| </div> |  | ||||||
|      |      | ||||||
| <div class="grid grid-cols-3 gap-4"> |     export let article_data; | ||||||
|     <div class="highlight">01</div> |  | ||||||
|     <div class="highlight">01</div> |  | ||||||
|     <div class="highlight">01</div> |  | ||||||
|     <div class="highlight">01</div> |  | ||||||
|     <div class="highlight">01</div> |  | ||||||
|     <div class="highlight">01</div> |  | ||||||
|     <div class="highlight">01</div> |  | ||||||
|     <div class="highlight">01</div> |  | ||||||
|     <div class="highlight">01</div> |  | ||||||
|      |      | ||||||
| </div> |     const actions = [ | ||||||
| <style> |       {name: 'Mark as good (and skip to next)', kbd: 'A'}, | ||||||
|     .highlight { |       {name: 'Mark as bad (and skip to next)', kbd: 'B'}, | ||||||
|         background-color: #f5f5f5; |       {name: 'Upload related file', kbd: 'R'}, | ||||||
|         border-radius: 5px; |       {name: 'Skip', kbd: 'ctrl'}, | ||||||
|         padding: 10px; |     ] | ||||||
|         margin: 10px; |  | ||||||
|  |     const toast_states = { | ||||||
|  |       'success' : {class: 'alert-success', text: 'Article updated successfully'}, | ||||||
|  |       'error' : {class: 'alert-error', text: 'Article update failed'}, | ||||||
|     } |     } | ||||||
| </style> |     let toast_state = {}; | ||||||
|  |     let toast_visible = false; | ||||||
|  |  | ||||||
|  |      | ||||||
|  |     function onKeyDown(e) {apiAction(e.key)} | ||||||
|  |     function apiAction(key) { | ||||||
|  |       if (actions.map(d => d.kbd.toLowerCase()).includes(key.toLowerCase())){ // ignore other keypresses | ||||||
|  |  | ||||||
|  |         const updateArticle = (async() => { | ||||||
|  |           const response = await fetch('/api/article/' + article_data.id + '/set', { | ||||||
|  |             method: 'POST', | ||||||
|  |             headers: {'Content-Type': 'application/json'}, | ||||||
|  |             body: JSON.stringify({ | ||||||
|  |               'action': key.toLowerCase(), | ||||||
|  |             }) | ||||||
|  |           }) | ||||||
|  |           const success = response.status == 200; | ||||||
|  |          | ||||||
|  |           if (success){ | ||||||
|  |             showToast('success'); | ||||||
|  |           } else { | ||||||
|  |             showToast('error'); | ||||||
|  |           } | ||||||
|  |  | ||||||
|  |         })() | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     function showToast(state){   | ||||||
|  |       toast_visible = true; | ||||||
|  |       toast_state = toast_states[state]; | ||||||
|  |       setTimeout(() => { | ||||||
|  |         toast_visible = false; | ||||||
|  |       }, 1000) | ||||||
|  |  | ||||||
|  |     } | ||||||
|  | </script> | ||||||
|  |  | ||||||
|  |  | ||||||
|  | <div class="card bg-neutral-300 shadow-xl"> | ||||||
|  |     <div class="card-body"> | ||||||
|  |       <h2 class="card-title">Your options: (click on action or use keyboard)</h2> | ||||||
|  |       <div class="overflow-x-auto"> | ||||||
|  |         <table class="table w-full table-compact"> | ||||||
|  |           <!-- head --> | ||||||
|  |           <thead> | ||||||
|  |             <tr> | ||||||
|  |                 <th>Action</th> | ||||||
|  |                 <th>Keyboard shortcut</th> | ||||||
|  |             </tr> | ||||||
|  |           </thead> | ||||||
|  |           <tbody> | ||||||
|  |             {#each actions as action} | ||||||
|  |                  | ||||||
|  |                 <tr> | ||||||
|  |                     <td><button on:click={() => apiAction(action.kbd)}>{ action.name }</button></td> | ||||||
|  |                     <td><kbd class="kbd">{ action.kbd }</kbd></td> | ||||||
|  |                 </tr> | ||||||
|  |              | ||||||
|  |             {/each} | ||||||
|  |           </tbody> | ||||||
|  |         </table> | ||||||
|  |       </div> | ||||||
|  |     </div> | ||||||
|  | </div> | ||||||
|  |  | ||||||
|  | <svelte:window on:keydown|preventDefault={onKeyDown} /> | ||||||
|  |  | ||||||
|  | {#if toast_visible} | ||||||
|  | <div class="toast" transition:fade> | ||||||
|  |   <div class="alert { toast_state.class }"> | ||||||
|  |       <div> | ||||||
|  |       <span>{ toast_state.text }.</span> | ||||||
|  |       </div> | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  | {/if} | ||||||
| @@ -1,25 +1,38 @@ | |||||||
| <script> | <script> | ||||||
|     export let article_id; |     export let article_data; | ||||||
|     const Article = (async () => { |     const status_items = [ | ||||||
|     const response = await fetch('/api/article/' + article_id + '/get') |         {name: 'Title', value: article_data.title}, | ||||||
|     return await response.json() |         {name: 'Filename', value: article_data.file_name}, | ||||||
| 	})() |         {name: 'Language', value: article_data.language}, | ||||||
|     console.log(Article) |         {name: 'Authors', value: article_data.authors}, | ||||||
|  |         {name: "Related", value: article_data.related}, | ||||||
|  |     ] | ||||||
| </script> | </script> | ||||||
|  |  | ||||||
| <div class="mockup-window border bg-base-300"> | <div class="card bg-neutral-300 shadow-xl overflow-x-auto"> | ||||||
|     <h1 class="center">Article overview</h1> |     <div class="card-body"> | ||||||
|     <ul tabindex="0" class="menu p-2 shadow bg-base-100 rounded-box w-52"> |       <h2 class="card-title">Article overview:</h2> | ||||||
|         {#await Article} |         <table class="table w-full table-compact" style="table-layout: fixed"> | ||||||
|             <li>...waiting</li> |           <thead> | ||||||
|         {:then data} |             <tr> | ||||||
|             <li><a href="#">{data.value}</a></li> |               <th>Attribute</th> | ||||||
|             <li><a href="#">Item 2</a></li> |               <th>Value</th> | ||||||
|         {:catch error} |             </tr> | ||||||
|             <li>An error occurred!</li> |           </thead> | ||||||
|         {/await} |           <tbody> | ||||||
|      |             {#each status_items as item} | ||||||
|          |                 <tr> | ||||||
|     </ul> |                     <td>{ item.name }</td> | ||||||
|  |                     <!-- <td>Quality Control Specialist</td> --> | ||||||
|  |                     {#if item.value != ""} | ||||||
|  |                         <td class='bg-emerald-200' style="white-space: normal">{ item.value }</td> | ||||||
|  |                     {:else} | ||||||
|  |                         <td class='bg-red-200'>{ item.value }</td> | ||||||
|  |                     {/if} | ||||||
|  |                 </tr> | ||||||
|  |             {/each} | ||||||
|  |           </tbody> | ||||||
|  |         </table> | ||||||
|  |       </div> | ||||||
|      |      | ||||||
| </div> | </div> | ||||||
| @@ -1,64 +1,10 @@ | |||||||
| <!--  |  | ||||||
| <script> |  | ||||||
| 	var myState = { |  | ||||||
| 		pdf: null, |  | ||||||
| 		currentPage: 1, |  | ||||||
| 		zoom: 1 |  | ||||||
| 	} |  | ||||||
|    |  | ||||||
| 	pdfjsLib.getDocument('test.pdf').then((pdf) => { |  | ||||||
|    |  | ||||||
| 		myState.pdf = pdf; |  | ||||||
| 		render(); |  | ||||||
|  |  | ||||||
| 	}); |  | ||||||
|  |  | ||||||
| 	function render() { |  | ||||||
| 		myState.pdf.getPage(myState.currentPage).then((page) => { |  | ||||||
| 	   |  | ||||||
| 			var canvas = document.getElementById("pdf_renderer"); |  | ||||||
| 			var ctx = canvas.getContext('2d'); |  | ||||||
|    |  | ||||||
| 			var viewport = page.getViewport(myState.zoom); |  | ||||||
|  |  | ||||||
| 			canvas.width = viewport.width; |  | ||||||
| 			canvas.height = viewport.height; |  | ||||||
| 	   |  | ||||||
| 			page.render({ |  | ||||||
| 				canvasContext: ctx, |  | ||||||
| 				viewport: viewport |  | ||||||
| 			}); |  | ||||||
| 		}); |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| </script> |  | ||||||
|   --> |  | ||||||
| <!-- <div id="my_pdf_viewer"> |  | ||||||
|     <div class="mockup-window border bg-base-300"> |  | ||||||
|         <div id="canvas_container" class="flex justify-center"> |  | ||||||
|             <canvas id="pdf_renderer"></canvas> |  | ||||||
|         </div> |  | ||||||
|     </div> |  | ||||||
|          |  | ||||||
|  |  | ||||||
|     <div id="navigation_controls"> |  | ||||||
|         <button id="go_previous">Previous</button> |  | ||||||
|         <input id="current_page" value="1" type="number"/> |  | ||||||
|         <button id="go_next">Next</button> |  | ||||||
|     </div> |  | ||||||
|  |  | ||||||
|     <div id="zoom_controls">   |  | ||||||
|         <button id="zoom_in">+</button> |  | ||||||
|         <button id="zoom_out">-</button> |  | ||||||
|     </div> |  | ||||||
| </div> --> |  | ||||||
|  |  | ||||||
| <script> | <script> | ||||||
| 	let pdf_file = 'test.pdf'; | 	export let article_data; | ||||||
| </script>  | </script>  | ||||||
|  |  | ||||||
| <div class="mockup-window border bg-base-300 h-full w-full"> | <div class="h-full w-full shadow-xl"> | ||||||
| 	<object class="pdf-view" data="{pdf_file}" title="Article PDF"> </object> | 	<object class="pdf-view" data="{article_data.save_path + article_data.file_name}" title="Article PDF"> </object> | ||||||
| </div> | </div> | ||||||
|  |  | ||||||
| <style> | <style> | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| import json | from flask import Flask, send_from_directory, request | ||||||
| from flask import Flask, send_from_directory, jsonify | import configuration | ||||||
| import random | models = configuration.models | ||||||
|  | db = configuration.db | ||||||
| app = Flask(__name__) | app = Flask(__name__) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -9,26 +9,55 @@ app = Flask(__name__) | |||||||
| # SVELTE 'STATIC' BACKEND. Always send index.html and the requested js-files. (compiled by npm) | # SVELTE 'STATIC' BACKEND. Always send index.html and the requested js-files. (compiled by npm) | ||||||
|  |  | ||||||
| @app.route("/") #index.html | @app.route("/") #index.html | ||||||
| def base(): | def index(): | ||||||
|     return send_from_directory('../client/public', 'index.html') |     return send_from_directory('../client/public', 'index.html') | ||||||
| @app.route("/<path:path>") #js-files | @app.route("/<path:path>") #js-files | ||||||
| def home(path): | def js(path): | ||||||
|     return send_from_directory('../client/public', path) |     return send_from_directory('../client/public', path) | ||||||
|  | @app.route("/app/containerdata/files/<path:path>") | ||||||
|  | def static_pdf(path): | ||||||
|  |     return send_from_directory('/app/containerdata/files/', path) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ############################################################################### | ############################################################################### | ||||||
| # API for news_check. | # (simple) API for news_check. | ||||||
|  |  | ||||||
| @app.route("/api/article/<int:id>/get") | @app.route("/api/article/<int:id>/get") | ||||||
| def get_article(id): | def get_article_by_id(id): | ||||||
|     res = {"value": id} |     with db: | ||||||
|     return jsonify(res) |         article = models.ArticleDownload.get_by_id(id) | ||||||
|  |         return article.to_dict() | ||||||
|  |  | ||||||
|  | @app.route("/api/article/first") | ||||||
|  | def get_article_first(): | ||||||
|  |     with db: | ||||||
|  |         article = models.ArticleDownload.select(models.ArticleDownload.id).where(models.ArticleDownload.verified == 0).order_by(models.ArticleDownload.id).first() | ||||||
|  |         return {"id" : article.id} | ||||||
|  |  | ||||||
|  | @app.route("/api/article/<int:id>/next") | ||||||
|  | def get_article_next(id): | ||||||
|  |     with db: | ||||||
|  |         if models.ArticleDownload.get_by_id(id + 1).verified == 0: | ||||||
|  |             return {"id" : id + 1} | ||||||
|  |         else: | ||||||
|  |             return get_article_first() | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @app.route("/api/article/<int:id>/set", methods=['POST']) | @app.route("/api/article/<int:id>/set", methods=['POST']) | ||||||
| def set_article(id): | def set_article(id): | ||||||
|     return str(random.randint(0, 100)) |     action = request.json['action'] | ||||||
|  |     with db: | ||||||
|  |         article = models.ArticleDownload.get_by_id(id) | ||||||
|  |         if action == "a": | ||||||
|  |             article.verified = 1 | ||||||
|  |         elif action == "b": | ||||||
|  |             article.verified = -1 | ||||||
|  |         elif action == "r": | ||||||
|  |             article.set_related() | ||||||
|  |         article.save() | ||||||
|  |         return "ok" | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										16
									
								
								news_check/server/configuration.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								news_check/server/configuration.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | |||||||
|  | from peewee import PostgresqlDatabase | ||||||
|  | import configparser | ||||||
|  |  | ||||||
|  | main_config = configparser.ConfigParser() | ||||||
|  | main_config.read("/app/containerdata/config/news_fetch.config.ini") | ||||||
|  |  | ||||||
|  | db_config = configparser.ConfigParser() | ||||||
|  | db_config.read("/app/containerdata/config/db.config.ini") | ||||||
|  |  | ||||||
|  | cred = db_config["DATABASE"] | ||||||
|  | db = PostgresqlDatabase( | ||||||
|  |     cred["db_name"], user=cred["user_name"], password=cred["password"], host="vpn", port=5432 | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | import models | ||||||
|  | models.set_db(db) | ||||||
							
								
								
									
										134
									
								
								news_check/server/models.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								news_check/server/models.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,134 @@ | |||||||
|  | import logging | ||||||
|  | logger = logging.getLogger(__name__) | ||||||
|  |  | ||||||
|  | from peewee import * | ||||||
|  | import os | ||||||
|  | import datetime | ||||||
|  | import configuration | ||||||
|  |  | ||||||
|  | config = configuration.main_config["DOWNLOADS"] | ||||||
|  |  | ||||||
|  | # set the nature of the db at runtime | ||||||
|  | download_db = DatabaseProxy() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class DownloadBaseModel(Model): | ||||||
|  |     class Meta: | ||||||
|  |         database = download_db | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## == Article related models == ## | ||||||
|  | class ArticleDownload(DownloadBaseModel): | ||||||
|  |     # in the beginning this is all we have | ||||||
|  |     article_url = TextField(default = '', unique=True) | ||||||
|  |      | ||||||
|  |     # fetch then fills in the metadata | ||||||
|  |     title = TextField(default='') | ||||||
|  |  | ||||||
|  |     summary = TextField(default = '') | ||||||
|  |     source_name = CharField(default = '') | ||||||
|  |     language = CharField(default = '') | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     file_name = TextField(default = '') | ||||||
|  |     @property | ||||||
|  |     def save_path(self): | ||||||
|  |         return f"{config['local_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/" | ||||||
|  |     @property | ||||||
|  |     def fname_nas(self, file_name=""): | ||||||
|  |         if self.download_date: | ||||||
|  |             if file_name: | ||||||
|  |                 return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{file_name}" | ||||||
|  |             else: # return the self. name | ||||||
|  |                 return f"NAS: {config['remote_storage_path']}/{self.download_date.year}/{self.download_date.strftime('%B')}/{self.file_name}" | ||||||
|  |         else: | ||||||
|  |             return None | ||||||
|  |  | ||||||
|  |      | ||||||
|  |     archive_url = TextField(default = '') | ||||||
|  |     pub_date = DateField(default = datetime.date.fromtimestamp(0)) | ||||||
|  |     download_date = DateField(default = datetime.date.today) | ||||||
|  |  | ||||||
|  |     slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by | ||||||
|  |  | ||||||
|  |     sent = BooleanField(default = False) | ||||||
|  |      | ||||||
|  |     archived_by = CharField(default = os.getenv("UNAME")) | ||||||
|  |     # need to know who saved the message because the file needs to be on their computer in order to get verified | ||||||
|  |     # verification happens in a different app, but the model has the fields here as well | ||||||
|  |     comment = TextField(default = '') | ||||||
|  |     verified = IntegerField(default = 0) # 0 = not verified, 1 = verified, -1 = marked as bad | ||||||
|  |  | ||||||
|  |     # authors | ||||||
|  |     # keywords | ||||||
|  |     # ... are added through foreignkeys | ||||||
|  |     # we will also add an attribute named message, to reference which message should be replied to. This attribute does not need to be saved in the db | ||||||
|  |  | ||||||
|  |     def to_dict(self): | ||||||
|  |         return { | ||||||
|  |             "id": self.id, | ||||||
|  |             "article_url": self.article_url, | ||||||
|  |             "title": self.title, | ||||||
|  |             "summary": self.summary, | ||||||
|  |             "source_name": self.source_name, | ||||||
|  |             "language": self.language, | ||||||
|  |             "file_name": self.file_name, | ||||||
|  |             "save_path": self.save_path, | ||||||
|  |             "fname_nas": self.fname_nas, | ||||||
|  |             "archive_url": self.archive_url, | ||||||
|  |             "pub_date": self.pub_date.strftime("%Y-%m-%d"), | ||||||
|  |             "download_date": self.download_date.strftime("%Y-%m-%d"), | ||||||
|  |             "sent": self.sent, | ||||||
|  |             "comment": self.comment, | ||||||
|  |             "related": [r.related_file_name for r in self.related], | ||||||
|  |             "authors": [a.author for a in self.authors] | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     def set_related(self, related): | ||||||
|  |         for r in related: | ||||||
|  |             if len(r) > 255: | ||||||
|  |                 raise Exception("Related file name too long for POSTGRES") | ||||||
|  |  | ||||||
|  |             ArticleRelated.create( | ||||||
|  |                 article = self, | ||||||
|  |                 related_file_name = r | ||||||
|  |             ) | ||||||
|  |  | ||||||
|  |     def file_status(self): | ||||||
|  |         if not self.file_name: | ||||||
|  |             logger.error(f"Article {self} has no filename!") | ||||||
|  |             return False, {"reply_text": "Download failed, no file was saved.", "file_path": None} | ||||||
|  |          | ||||||
|  |         file_path_abs = self.save_path + self.file_name | ||||||
|  |         if not os.path.exists(file_path_abs): | ||||||
|  |             logger.error(f"Article {self} has a filename, but the file does not exist at that location!") | ||||||
|  |             return False, {"reply_text": "Can't find file. Either the download failed or the file was moved.", "file_path": None} | ||||||
|  |  | ||||||
|  |         return True, {} | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ArticleAuthor(DownloadBaseModel): | ||||||
|  |     article = ForeignKeyField(ArticleDownload, backref='authors') | ||||||
|  |     author = CharField() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ArticleRelated(DownloadBaseModel): | ||||||
|  |     # Related files, such as the full text of a paper, audio files, etc. | ||||||
|  |     article = ForeignKeyField(ArticleDownload, backref='related') | ||||||
|  |     related_file_name = TextField(default = '') | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def set_db(download_db_object): | ||||||
|  |     download_db.initialize(download_db_object) | ||||||
|  |     with download_db: # create tables (does nothing if they exist already) | ||||||
|  |         download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated]) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -1,20 +0,0 @@ | |||||||
| import peewee |  | ||||||
|  |  | ||||||
| db = peewee.PostgresqlDatabase('coss_archiving', user='ca_rw', password='pleasechangeit', host='vpn', port=5432) |  | ||||||
| # db.connect() |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class Pet(peewee.Model): |  | ||||||
|     name = peewee.CharField() |  | ||||||
|     animal_type = peewee.CharField() |  | ||||||
|  |  | ||||||
|     class Meta: |  | ||||||
|         database = db # this model uses the "people.db" database |  | ||||||
| with db: |  | ||||||
|     db.create_tables([Pet]) |  | ||||||
| db.get_tables() |  | ||||||
|  |  | ||||||
| t = Pet.create(name="Test", animal_type="test") |  | ||||||
|  |  | ||||||
| for pet in Pet.select(): |  | ||||||
|     print(pet.name) |  | ||||||
| @@ -8,3 +8,4 @@ newspaper3k | |||||||
| htmldate | htmldate | ||||||
| markdown | markdown | ||||||
| rich | rich | ||||||
|  | psycopg2 | ||||||
| @@ -123,7 +123,6 @@ class Coordinator(Thread): | |||||||
|         unsent = models.ArticleDownload.filter(sent = False) |         unsent = models.ArticleDownload.filter(sent = False) | ||||||
|         # .objects.filter(sent = False) |         # .objects.filter(sent = False) | ||||||
|         for a in unsent: |         for a in unsent: | ||||||
|             print(a) |  | ||||||
|             self.incoming_request(article=a) |             self.incoming_request(article=a) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -170,7 +169,7 @@ class Coordinator(Thread): | |||||||
|  |  | ||||||
|         for article in articles: |         for article in articles: | ||||||
|             notifier = lambda article: logger.info(f"Completed manual actions for {article}") |             notifier = lambda article: logger.info(f"Completed manual actions for {article}") | ||||||
|             ArticleWatcher(article, None, workers_manual = workers, notifier = notifier) # Article watcher wants a thread to link article to TODO: handle threads as a kwarg  |             ArticleWatcher(article, workers_manual = workers, notifier = notifier) # Article watcher wants a thread to link article to TODO: handle threads as a kwarg  | ||||||
|  |  | ||||||
|     def article_complete_notifier(self, article): |     def article_complete_notifier(self, article): | ||||||
|         if self.worker_slack is None: |         if self.worker_slack is None: | ||||||
| @@ -192,7 +191,7 @@ if __name__ == "__main__": | |||||||
|  |  | ||||||
|  |  | ||||||
|     if "upload" in sys.argv: |     if "upload" in sys.argv: | ||||||
|         articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "").execute() |         articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "" or models.ArticleDownload.archive_url == "TODO:UPLOAD").execute() | ||||||
|         logger.info(f"Launching upload to archive for {len(articles)} articles.") |         logger.info(f"Launching upload to archive for {len(articles)} articles.") | ||||||
|         coordinator.manual_processing(articles, [UploadWorker()]) |         coordinator.manual_processing(articles, [UploadWorker()]) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -4,7 +4,6 @@ logger = logging.getLogger(__name__) | |||||||
| from peewee import * | from peewee import * | ||||||
| import os | import os | ||||||
| import markdown | import markdown | ||||||
| import re |  | ||||||
| import configuration | import configuration | ||||||
| import datetime | import datetime | ||||||
|  |  | ||||||
| @@ -28,7 +27,7 @@ class ArticleDownload(DownloadBaseModel): | |||||||
|     article_url = TextField(default = '', unique=True) |     article_url = TextField(default = '', unique=True) | ||||||
|      |      | ||||||
|     # fetch then fills in the metadata |     # fetch then fills in the metadata | ||||||
|     title = CharField(default='') |     title = TextField(default='') | ||||||
|     @property |     @property | ||||||
|     def is_title_bad(self):  # add incrementally |     def is_title_bad(self):  # add incrementally | ||||||
|         return "PUR-Abo" in self.title \ |         return "PUR-Abo" in self.title \ | ||||||
| @@ -63,7 +62,7 @@ class ArticleDownload(DownloadBaseModel): | |||||||
|  |  | ||||||
|      |      | ||||||
|     archive_url = TextField(default = '') |     archive_url = TextField(default = '') | ||||||
|     pub_date = DateField(default = '') |     pub_date = DateField(default = datetime.date.fromtimestamp(0)) | ||||||
|     download_date = DateField(default = datetime.date.today) |     download_date = DateField(default = datetime.date.today) | ||||||
|  |  | ||||||
|     slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by |     slack_ts = FloatField(default = 0) # should be a fixed-length string but float is easier to sort by | ||||||
| @@ -143,6 +142,7 @@ class ArticleDownload(DownloadBaseModel): | |||||||
|  |  | ||||||
|     def set_authors(self, authors): |     def set_authors(self, authors): | ||||||
|         for a in authors: |         for a in authors: | ||||||
|  |             if len(a) < 100: # otherwise it's a mismatched string | ||||||
|                 ArticleAuthor.create( |                 ArticleAuthor.create( | ||||||
|                     article = self, |                     article = self, | ||||||
|                     author = a |                     author = a | ||||||
| @@ -150,6 +150,9 @@ class ArticleDownload(DownloadBaseModel): | |||||||
|  |  | ||||||
|     def set_related(self, related): |     def set_related(self, related): | ||||||
|         for r in related: |         for r in related: | ||||||
|  |             if len(r) > 255: | ||||||
|  |                 raise Exception("Related file name too long for POSTGRES") | ||||||
|  |  | ||||||
|             ArticleRelated.create( |             ArticleRelated.create( | ||||||
|                 article = self, |                 article = self, | ||||||
|                 related_file_name = r |                 related_file_name = r | ||||||
| @@ -182,116 +185,7 @@ class ArticleRelated(DownloadBaseModel): | |||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # class Thread(ChatBaseModel): |  | ||||||
| #     """The threads that concern us are only created if the base massage contains a url""" |  | ||||||
| #     thread_ts = FloatField(default = 0) |  | ||||||
| #     article = ForeignKeyField(ArticleDownload, backref="slack_thread", null=True, default=None) |  | ||||||
| #     # provides, ts, user, models |  | ||||||
| #     # messages |  | ||||||
|  |  | ||||||
| #     @property |  | ||||||
| #     def slack_ts(self): |  | ||||||
| #         str_ts = str(self.thread_ts) |  | ||||||
| #         cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem! |  | ||||||
| #         return "{}{}".format(str_ts, cut_zeros*"0") |  | ||||||
|  |  | ||||||
| #     @property |  | ||||||
| #     def initiator_message(self): |  | ||||||
| #         try: |  | ||||||
| #             return self.messages[0] # TODO check if this needs sorting |  | ||||||
| #         except IndexError: |  | ||||||
| #             logger.warning(f"Thread {self} is empty. How can that be?") |  | ||||||
| #             return None |  | ||||||
|  |  | ||||||
| #     @property |  | ||||||
| #     def message_count(self): |  | ||||||
| #         # logger.warning("message_count was called") |  | ||||||
| #         return self.messages.count() |  | ||||||
|  |  | ||||||
| #     @property |  | ||||||
| #     def last_message(self): |  | ||||||
| #         messages = Message.select().where(Message.thread == self).order_by(Message.ts) # can't be empty by definition/creation |  | ||||||
| #         return messages[-1] |  | ||||||
|  |  | ||||||
| #     @property |  | ||||||
| #     def is_fully_processed(self) -> bool: |  | ||||||
| #         init_message = self.initiator_message |  | ||||||
| #         if init_message is None: |  | ||||||
| #             return False |  | ||||||
|          |  | ||||||
| #         if init_message.is_processed_override: |  | ||||||
| #             return True |  | ||||||
| #         # this override is set for instance, when no url was sent at all. Then set this thread to be ignored |  | ||||||
|          |  | ||||||
| #         reactions = init_message.reaction |  | ||||||
| #         if not reactions: |  | ||||||
| #             return False |  | ||||||
| #         else: |  | ||||||
| #             r = reactions[0].type # can and should only have one reaction |  | ||||||
| #             return r == "white_check_mark" \ |  | ||||||
| #                 or r == "x" |  | ||||||
|  |  | ||||||
|  |  | ||||||
|      |  | ||||||
| # class Message(ChatBaseModel): |  | ||||||
| #     ts = FloatField(unique=True) #for sorting |  | ||||||
| #     channel_id = CharField(default='') |  | ||||||
| #     user = ForeignKeyField(User, backref="messages") |  | ||||||
| #     text = TextField(default='') |  | ||||||
| #     thread = ForeignKeyField(Thread, backref="messages", default=None) |  | ||||||
| #     file_type = CharField(default='') |  | ||||||
| #     perma_link = CharField(default='') |  | ||||||
| #     is_processed_override = BooleanField(default=False) |  | ||||||
| #     # reaction |  | ||||||
|  |  | ||||||
| #     def __str__(self) -> str: |  | ||||||
| #         return "MSG [{}]".format(shorten_name(self.text).replace('\n','/')) |  | ||||||
|  |  | ||||||
| #     @property |  | ||||||
| #     def slack_ts(self): |  | ||||||
| #         str_ts = str(self.ts) |  | ||||||
| #         cut_zeros = 6 - (len(str_ts) - str_ts.find(".") - 1) # usually there a 6 decimals. If there are less, problem! |  | ||||||
| #         return "{}{}".format(str_ts, cut_zeros * "0") |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #     @property |  | ||||||
| #     def urls(self): |  | ||||||
| #         pattern = r"<(.*?)>" |  | ||||||
| #         matches = re.findall(pattern, self.text) |  | ||||||
| #         matches = [m for m in matches if "." in m] |  | ||||||
|          |  | ||||||
| #         new_matches = [] |  | ||||||
| #         for m in matches: |  | ||||||
| #             if "." in m:  # must contain a tld, right? |  | ||||||
| #                 # further complication: slack automatically abreviates urls in the format:  |  | ||||||
| #                 # <url|link preview>. Lucky for us, "|" is a character derecommended in urls, meaning we can "safely" split for it and retain the first half |  | ||||||
| #                 if "|" in m: |  | ||||||
| #                     keep = m.split("|")[0] |  | ||||||
| #                 else: |  | ||||||
| #                     keep = m |  | ||||||
| #                 new_matches.append(keep) |  | ||||||
| #         return new_matches |  | ||||||
|      |  | ||||||
| #     @property |  | ||||||
| #     def is_by_human(self): |  | ||||||
| #         return self.user.user_id != slack_config["bot_id"] |  | ||||||
|  |  | ||||||
|      |  | ||||||
| #     @property |  | ||||||
| #     def has_single_url(self): |  | ||||||
| #         return len(self.urls) == 1 |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def set_db(download_db_object): | def set_db(download_db_object): | ||||||
|     download_db.initialize(download_db_object) |     download_db.initialize(download_db_object) | ||||||
|     with download_db: # create tables (does nothing if they exist already) |     with download_db: # create tables (does nothing if they exist already) | ||||||
|         download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated]) |         download_db.create_tables([ArticleDownload, ArticleAuthor, ArticleRelated]) | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user