Better structure
This commit is contained in:
		
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -2,3 +2,4 @@ | |||||||
|  |  | ||||||
| *.pyc | *.pyc | ||||||
| *.log | *.log | ||||||
|  | __pycache__/ | ||||||
							
								
								
									
										30
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										30
									
								
								README.md
									
									
									
									
									
								
							| @@ -6,26 +6,25 @@ A utility to fetch article requests from slack and generate pdfs for them, fully | |||||||
| ## Running | ## Running | ||||||
| ### How to run - auto archiving mode | ### How to run - auto archiving mode | ||||||
| In this mode the program is launched as a docker container, in a headless mode. For persistence purposes a local storage volume is required, but that's it! | In this mode the program is launched as a docker container, in a headless mode. For persistence purposes a local storage volume is required, but that's it! | ||||||
|  |  | ||||||
| `docker run -it -v <your storage>:/app/file_storage/ auto_news` | `docker run -it -v <your storage>:/app/file_storage/ auto_news` | ||||||
|  |  | ||||||
| You can specify additional parameters:  | You can specify additional parameters:  | ||||||
|  |  | ||||||
| `docker run -it -v <your storage>:/app/file_storage/ auto_news debug` runs with debug values (does not write to prod db, does not send mails) | `docker run -it -v <your storage>:/app/file_storage/ auto_news debug` runs with debug values (does not write to prod db, does not send mails) | ||||||
| `docker run -it -v <your storage>:/app/file_storage/ auto_news upload` catches up on past uploads to archive. |  | ||||||
| `docker run -it -v <your storage>:/app/file_storage/ -e DISPLAY=":0" --network host -v \$XAUTHORITY:/root/.Xauthority auto_news check` lets you visually verify the downloaded files. Be aware that it requires additional parameters in order to open guis on the host. | `docker run -it -v <your storage>:/app/file_storage/ auto_news upload` catches up on incomplete uploads to archive. | ||||||
|  |  | ||||||
|  | `docker run -it -v <your storage>:/app/file_storage/ -e DISPLAY=":0" --network host -v \$XAUTHORITY:/root/.Xauthority auto_news check` lets you visually verify the downloaded files. The additional parameters are required in order to open guis on the host. | ||||||
|  |  | ||||||
|  |  | ||||||
| ### How to run - development mode | ### How to run - development mode | ||||||
| In this mode, a docker container is launched with an additional volume, the local code. You can test your code without the need to rebuild the image. | In this mode, a docker container is launched with an additional volume, the local code. You can test your code without the need to rebuild the image. | ||||||
|  |  | ||||||
| `docker run -it -v <your storage>:/app/file_storage/ -v <your code>:/code/ --entry-point /bin/bash auto_news` | `docker run -it -v <your storage>:/app/file_storage/ -v <your code>:/code/ --entry-point /bin/bash auto_news` | ||||||
| You are droppped into a bash shell, in which you can navigate to the `/code` directory and then test live. | You are droppped into a bash shell, in which you can navigate to the `/code` directory and then test live. | ||||||
|  |  | ||||||
|  |  | ||||||
| % ### How to run - file checker mode |  | ||||||
| % This mode requires the most access rights. You want to access all files and open gui programs. |  | ||||||
| % `docker run -it -e DISPLAY=":0" --network host -v $XAUTHORITY:/root/.Xauthority -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -v /mnt/Data/COSS/DOWNLOADS/auto_news/app:/code auto_news /bin/bash` |  | ||||||
| % Similarly to the development mode, you can cd into code and run your checking duties. |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ## Building | ## Building | ||||||
| @@ -41,6 +40,17 @@ where the `Dockerfile` has to be in the working directory | |||||||
|  |  | ||||||
| ## Cheat-sheet Remy: | ## Cheat-sheet Remy: | ||||||
|  |  | ||||||
| docker run -it -e LIVECODE=TRUE -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -v /mnt/Data/COSS/DOWNLOADS/auto_news/app:/code/ auto_news /bin/bash | `docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ auto_news` | ||||||
|  |  | ||||||
| docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ auto_news | `docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -v /mnt/Data/COSS/auto_news/app:/code --entrypoint /bin/bash auto_news` | ||||||
|  |  | ||||||
|  |  | ||||||
|  | `docker run -it -v /mnt/Data/COSS/DOWNLOADS/auto_news/container_data/:/app/file_storage/ -e DISPLAY=":0" --network host -v \$XAUTHORITY:/root/.Xauthority auto_news check` | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## Roadmap: | ||||||
|  |  | ||||||
|  | [] automatically upload files to NAS | ||||||
|  | [] handle paywalled sites like faz, spiegel, .. through their dedicated edu-sites | ||||||
|  | ... | ||||||
| @@ -7,7 +7,7 @@ logger = logging.getLogger(__name__) | |||||||
|  |  | ||||||
| from utils_mail import runner as mail_runner | from utils_mail import runner as mail_runner | ||||||
| from utils_slack import runner as slack_runner | from utils_slack import runner as slack_runner | ||||||
| from utils.workers import CompressWorker, DownloadWorker, FetchWorker, UploadWorker | from utils_worker.workers import CompressWorker, DownloadWorker, FetchWorker, UploadWorker | ||||||
|  |  | ||||||
|  |  | ||||||
| class ArticleWatcher: | class ArticleWatcher: | ||||||
| @@ -174,7 +174,8 @@ if __name__ == "__main__": | |||||||
|         logger.info(f"Launching upload to archive for {len(urls)} urls.") |         logger.info(f"Launching upload to archive for {len(urls)} urls.") | ||||||
|         coordinator.manual_processing(urls, [UploadWorker()]) |         coordinator.manual_processing(urls, [UploadWorker()]) | ||||||
|     elif "check" in sys.argv: |     elif "check" in sys.argv: | ||||||
|         logger.info("Not implemented yet.") |         from utils_check import runner as check_runner | ||||||
|  |         check_runner.verify_unchecked() | ||||||
|     else: # launch with full action |     else: # launch with full action | ||||||
|         kwargs = { |         kwargs = { | ||||||
|             "worker_download" : DownloadWorker(), |             "worker_download" : DownloadWorker(), | ||||||
|   | |||||||
							
								
								
									
										285
									
								
								app/utils_check/runner.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										285
									
								
								app/utils_check/runner.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,285 @@ | |||||||
|  | from rich.console import Console | ||||||
|  | from rich.table import Table | ||||||
|  | from rich.columns import Columns | ||||||
|  | from rich.rule import Rule | ||||||
|  | console = Console() | ||||||
|  | hline = Rule(style="white") | ||||||
|  |  | ||||||
|  | import os | ||||||
|  | import subprocess | ||||||
|  | from slack_sdk import WebClient | ||||||
|  | import configuration | ||||||
|  | models = configuration.models | ||||||
|  |  | ||||||
|  | u_options = { | ||||||
|  |     "ENTER" : "Accept PDF as is. It gets marked as verified", | ||||||
|  |     "D" : "set languange to DE and set verified", | ||||||
|  |     "E" : "set languange to EN and set verified", | ||||||
|  |     "O" : "set other language (prompted)", | ||||||
|  |     "R" : "set related files (prompted multiple times)", | ||||||
|  |     "B" : "reject and move to folder BAD", | ||||||
|  |     "L" : "leave file as is, do not send reaction" | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | bot_client = WebClient( | ||||||
|  |     token = configuration.parsed["SLACK"]["auth_token"] | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def file_overview(file_url: str, file_attributes: list, options: dict) -> None: | ||||||
|  |     """Prints a neat overview of the current article""" | ||||||
|  |     file_table = Table( | ||||||
|  |         title = file_url, | ||||||
|  |         row_styles = ["white", "bright_black"], | ||||||
|  |         min_width = 150 | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     file_table.add_column("Attribute", justify = "right", no_wrap = True) | ||||||
|  |     file_table.add_column("Value set by auto_news") | ||||||
|  |     file_table.add_column("Status", justify = "right") | ||||||
|  |     for attr in file_attributes: | ||||||
|  |         file_table.add_row(attr["name"], attr["value"], attr["status"]) | ||||||
|  |  | ||||||
|  |      | ||||||
|  |     option_key = "\n".join([f"[[bold]{k}[/bold]]" for k in options.keys()]) | ||||||
|  |     option_action = "\n".join([f"[italic]{k}[/italic]" for k in options.values()]) | ||||||
|  |     columns = Columns([option_key, option_action]) | ||||||
|  |  | ||||||
|  |     console.print(file_table) | ||||||
|  |     console.print("Your options:") | ||||||
|  |     console.print(columns) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def send_reaction_to_slack_thread(article, reaction): | ||||||
|  |     """Sends the verification status as a reaction to the associated slack thread. This will significantly decrease load times of the bot""" | ||||||
|  |     messages = models.Message.select().where(models.Message.text.contains(article.article_url)) | ||||||
|  |     # TODO rewrite this shit | ||||||
|  |     if len(messages) > 5: | ||||||
|  |         print("Found more than 5 messages. Aborting reactions...") | ||||||
|  |         return | ||||||
|  |     for m in messages: | ||||||
|  |         if not m.has_single_url: | ||||||
|  |             print("Found thread but won't send reaction because thread has multiple urls") | ||||||
|  |             pass | ||||||
|  |         else: | ||||||
|  |             ts = m.slack_ts | ||||||
|  |             bot_client.reactions_add( | ||||||
|  |                 channel=configuration.parsed["SLACK"]["archive_id"], | ||||||
|  |                 name=reaction, | ||||||
|  |                 timestamp=ts | ||||||
|  |             ) | ||||||
|  |             print("Sent reaction to message") | ||||||
|  |  | ||||||
|  | def prompt_language(query): | ||||||
|  |     not_set = True | ||||||
|  |     while not_set: | ||||||
|  |         uin = input("Set language (nation-code, 2 letters) ") | ||||||
|  |         if len(uin) != 2: | ||||||
|  |             print("Bad code, try again") | ||||||
|  |         else: | ||||||
|  |             not_set = False | ||||||
|  |             query.language = uin | ||||||
|  |             query.save() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def prompt_related(query): | ||||||
|  |     file_list = [] | ||||||
|  |     finished = False | ||||||
|  |     while not finished: | ||||||
|  |         uin = input("Additional file for article? Type '1' to cancel ") | ||||||
|  |         if uin == "1": | ||||||
|  |             query.set_related(file_list) | ||||||
|  |             finished = True | ||||||
|  |         else: | ||||||
|  |             file_list.append(uin) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def prompt_new_fname(query): | ||||||
|  |     uin = input("New fname? ") | ||||||
|  |     old_fname =  query.file_name | ||||||
|  |     query.file_name = uin | ||||||
|  |     query.verified = 1 | ||||||
|  |     if old_fname != "": | ||||||
|  |         os.remove(query.save_path + old_fname) | ||||||
|  |     query.save()     | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def reject_article(article): | ||||||
|  |     article.verified = -1 | ||||||
|  |     article.save() | ||||||
|  |     print("Article marked as bad") | ||||||
|  |     # also update the threads to not be monitored anymore | ||||||
|  |     send_reaction_to_slack_thread(article, "x") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def unreject_article(query): | ||||||
|  |     query.verified = 1 | ||||||
|  |     query.save() | ||||||
|  |     # os.rename(badpdf, fname) | ||||||
|  |     print("File set to verified") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def accept_article(article, last_accepted): | ||||||
|  |     article.verified = 1 | ||||||
|  |     article.save() | ||||||
|  |     print("Article accepted as GOOD") | ||||||
|  |  | ||||||
|  |     # also update the threads to not be monitored anymore | ||||||
|  |     send_reaction_to_slack_thread(article, "white_check_mark") | ||||||
|  |  | ||||||
|  |     """linked = None | ||||||
|  |     try: | ||||||
|  |         thread = message_models.Thread.get(id = last_accepted.id + 1) | ||||||
|  |         rel = message_models.get_referenced_articles(thread, article_models.ArticleDownload) | ||||||
|  |         assert len(rel) == 1 and rel[0] == article | ||||||
|  |         linked = thread | ||||||
|  |     except: # if the above, naive method (just increment by one), fails, resort to brute search. | ||||||
|  |         print("Bruteforcing search") | ||||||
|  |         for t in message_models.Thread.select(): | ||||||
|  |             rel = message_models.get_referenced_articles(t, article_models.ArticleDownload) | ||||||
|  |             if len(rel) == 1 and rel[0] == article: | ||||||
|  |                 linked = t | ||||||
|  |                 break | ||||||
|  |          | ||||||
|  |     if linked: | ||||||
|  |         linked.initiator_message.is_processed_override = 1 | ||||||
|  |         linked.initiator_message.save() | ||||||
|  |         print("Message overwritten to PROCESSED") | ||||||
|  |  | ||||||
|  |     else: | ||||||
|  |         print("No matching thread found")""" | ||||||
|  |     return "" # linked | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def verify_unchecked(): | ||||||
|  |     query = models.ArticleDownload.select().where(models.ArticleDownload.verified == 0).execute() | ||||||
|  |     last_linked = None | ||||||
|  |  | ||||||
|  |     for article in query: | ||||||
|  |         console.print(hline) | ||||||
|  |         core_info = [] | ||||||
|  |         for e, name in zip([article.save_path, article.file_name, article.title, article.language], ["Save path", "File name", "Title", "Language"]): | ||||||
|  |             entry = { | ||||||
|  |                 "status" : "[red]██[/red]" if (len(e) == 0 or e == -1) else "[green]██[/green]", | ||||||
|  |                 "value" : e if len(e) != 0 else "not set", | ||||||
|  |                 "name" : name | ||||||
|  |             } | ||||||
|  |             core_info.append(entry) | ||||||
|  |          | ||||||
|  |         try: | ||||||
|  |             subprocess.Popen(["evince", f"file://{os.path.join(article.save_path, article.file_name)}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||||
|  |             # supress evince gtk warnings | ||||||
|  |         except Exception as e: | ||||||
|  |             print(str(list((-1, e)))) | ||||||
|  |             continue | ||||||
|  |  | ||||||
|  |          | ||||||
|  |  | ||||||
|  |         file_overview( | ||||||
|  |             file_url = article.article_url,  | ||||||
|  |             file_attributes=core_info, | ||||||
|  |             options = u_options | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |         proceed = False | ||||||
|  |         while not proceed: | ||||||
|  |             proceed = False | ||||||
|  |             uin = input("Choice ?").lower() | ||||||
|  |             if uin == "": | ||||||
|  |                 last_linked = accept_article(article, last_linked) # last linked accelerates the whole process | ||||||
|  |                 proceed = True | ||||||
|  |             elif uin == "d": | ||||||
|  |                 article.language = "de" | ||||||
|  |                 article.verified = 1 | ||||||
|  |                 article.save() | ||||||
|  |                 proceed = True | ||||||
|  |             elif uin == "e": | ||||||
|  |                 article.language = "en" | ||||||
|  |                 article.verified = 1 | ||||||
|  |                 article.save() | ||||||
|  |                 proceed = True | ||||||
|  |             elif uin == "o": | ||||||
|  |                 prompt_language(article) | ||||||
|  |             elif uin == "r": | ||||||
|  |                 prompt_related(article) | ||||||
|  |             elif uin == "b": | ||||||
|  |                 reject_article(article) | ||||||
|  |                 proceed = True | ||||||
|  |             elif uin == "l": | ||||||
|  |                 # do nothing | ||||||
|  |                 proceed = True | ||||||
|  |             else: | ||||||
|  |                 print("Invalid input") | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # def verify_bad(): | ||||||
|  | #     b_options = { | ||||||
|  | #         "ENTER":"Accept pdf as fixed", | ||||||
|  | #         "B": "Keep pdf in BAD.", | ||||||
|  | #         "R" : "set related files (prompted multiple times)", | ||||||
|  | #         "C" : "Change the saved file-name and set as verified." | ||||||
|  | #         } | ||||||
|  | #     query = article_models.ArticleDownload.select().where(article_models.ArticleDownload.verified == -1).execute() | ||||||
|  |  | ||||||
|  | #     for q in query: | ||||||
|  | #         pdf = q.file_name | ||||||
|  | #         save_dir = get_save_path(q) | ||||||
|  | #         fname = save_dir + "BAD/" + pdf | ||||||
|  | #         try: | ||||||
|  | #             subprocess.call(["xdg-open", fname]) | ||||||
|  | #         except: | ||||||
|  | #             print(f"[{testvar}██{testvar}] PDF moved:") | ||||||
|  | #             print(fname) | ||||||
|  | #             continue | ||||||
|  |  | ||||||
|  | #         status_pdf = f"{testvar}██{testvar}" | ||||||
|  | #         if "just a moment" in pdf: | ||||||
|  | #             status_pdf = f"{testvar}██{testvar}" | ||||||
|  |          | ||||||
|  | #         language = q.language | ||||||
|  | #         status_language = f"{testvar}██{testvar}" | ||||||
|  | #         if len(language) == 0: | ||||||
|  | #             status_language = f"{testvar}██{testvar}" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | #         print_status_options( | ||||||
|  | #             status=u_status.format( | ||||||
|  | #                 url = q.article_url, | ||||||
|  | #                 status_pdf = status_pdf, | ||||||
|  | #                 pdf = pdf[:80], | ||||||
|  | #                 status_language = status_language, | ||||||
|  | #                 language = language | ||||||
|  | #             ), | ||||||
|  | #             options = b_options) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | #         proceed = False | ||||||
|  | #         while not proceed: | ||||||
|  | #             proceed = False | ||||||
|  | #             uin = input("Choice? ").lower() | ||||||
|  | #             if uin == "": | ||||||
|  | #                 unreject_article(q) | ||||||
|  | #                 proceed = True | ||||||
|  | #             elif uin == "b": | ||||||
|  | #                 proceed = True | ||||||
|  | #             elif uin == "r": | ||||||
|  | #                 prompt_related(q) | ||||||
|  | #             elif uin == "c": | ||||||
|  | #                 prompt_new_fname(q) | ||||||
|  | #                 proceed = True | ||||||
|  | #             else: | ||||||
|  | #                 print("Invalid input") | ||||||
|  |  | ||||||
| @@ -31,10 +31,3 @@ def shrink_pdf(article): | |||||||
|         logger.error(f"Could not run the compression! {c.stderr.decode()} - {c.stdout.decode()}") |         logger.error(f"Could not run the compression! {c.stderr.decode()} - {c.stdout.decode()}") | ||||||
|      |      | ||||||
|     return article |     return article | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # gs -sDEVICE=pdfwrite -dPDFSETTINGS=/screen -dNOPAUSE -dBATCH -sOutputFile=out.pdf  |  | ||||||
| # ; mv -f temp.pdf file.pdf |  | ||||||
| @@ -29,7 +29,7 @@ class TemplateWorker(Thread): | |||||||
|                 time.sleep(5) |                 time.sleep(5) | ||||||
|             else: |             else: | ||||||
|                 article_watcher = self._article_queue.pop(0) |                 article_watcher = self._article_queue.pop(0) | ||||||
|                 self.logger.info(f"{self.__class__.__name__} is now processing an article") |                 self.logger.info(f"{self.__class__.__name__} is now processing article ({len(self._article_queue)} in queue)") | ||||||
|                 self._handle_article(article_watcher) |                 self._handle_article(article_watcher) | ||||||
|                  |                  | ||||||
| 
 | 
 | ||||||
| @@ -1,38 +0,0 @@ | |||||||
| import logging |  | ||||||
| import keys |  | ||||||
| from peewee import SqliteDatabase |  | ||||||
|  |  | ||||||
| from persistence import article_models |  | ||||||
| from archiving_utils import runner as archive_runner |  | ||||||
| from mail_utils import runner as mail_runner |  | ||||||
|  |  | ||||||
| # Global logger setup: |  | ||||||
| logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S') |  | ||||||
| logger = logging.getLogger("MailThread") |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # Constant values... |  | ||||||
| DOWNLOADS_DB = "/app/file_storage/downloads.db" |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # DB Setup: |  | ||||||
| article_models.set_db(SqliteDatabase( |  | ||||||
|     DOWNLOADS_DB, |  | ||||||
|     pragmas = {'journal_mode': 'wal'} # mutliple threads can access at once |  | ||||||
| )) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| mail_worker = mail_runner.MailSender(keys.MAIL_UNAME, keys.MAIL_PASSWORD, keys.MAIL_SENDER, keys.MAIL_RECIPIENT) |  | ||||||
| dl_worker = archive_runner.ArchivingThread(article_models, mail_worker) |  | ||||||
| dl_worker.start() |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # Retroactively sends a message to DIRK for messages that were archived using slack, but when the mail-reply was not yet implemented |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| url_list = [] |  | ||||||
|  |  | ||||||
| for url in url_list: |  | ||||||
|     dl_worker.get_or_save(url) |  | ||||||
		Reference in New Issue
	
	Block a user
	 Remy Moll
					Remy Moll