update nas target, documentation
This commit is contained in:
		
							
								
								
									
										8
									
								
								config/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								config/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,8 @@ | |||||||
|  | ## Configuration: example | ||||||
|  | The files inside this directory (not the ones in `env/`) are a sample of the required configuration. | ||||||
|  |  | ||||||
|  | Please create a copy of these files under `<location of downloads>/config/...`. | ||||||
|  |  | ||||||
|  | > Note: | ||||||
|  | > | ||||||
|  | > Some of the fields are blank, please fill them in as needed. | ||||||
							
								
								
									
										0
									
								
								env/debug → config/env/debug
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										0
									
								
								env/debug → config/env/debug
									
									
									
									
										vendored
									
									
								
							
							
								
								
									
										0
									
								
								env/production → config/env/production
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										0
									
								
								env/production → config/env/production
									
									
									
									
										vendored
									
									
								
							| @@ -25,7 +25,7 @@ db_printout: /app/containerdata/backups | |||||||
| local_storage_path: /app/containerdata/files | local_storage_path: /app/containerdata/files | ||||||
| debug_storage_path: /app/containerdata/debug/ | debug_storage_path: /app/containerdata/debug/ | ||||||
| default_download_path: /app/containerdata/tmp | default_download_path: /app/containerdata/tmp | ||||||
| remote_storage_path: /helbing_support/Files RM/Archiving | remote_storage_path: /helbing_support/Archiving-Pipeline | ||||||
| browser_profile_path: /app/containerdata/dependencies/news_fetch.profile | browser_profile_path: /app/containerdata/dependencies/news_fetch.profile | ||||||
| # please keep this exact name | # please keep this exact name | ||||||
| browser_print_delay: 3 | browser_print_delay: 3 | ||||||
| @@ -28,7 +28,7 @@ services: | |||||||
|       - ${CONTAINER_DATA}/config/nas_sync.config:/sync/nas_sync.config |       - ${CONTAINER_DATA}/config/nas_sync.config:/sync/nas_sync.config | ||||||
|       - ${CONTAINER_DATA}/config/nas_login.config:/sync/nas_login.config |       - ${CONTAINER_DATA}/config/nas_login.config:/sync/nas_login.config | ||||||
|     command: |     command: | ||||||
|       - nas22.ethz.ch/gess_coss_1/helbing_support/Files RM/Archiving/TEST # first command is the target mount path |       - nas22.ethz.ch/gess_coss_1/helbing_support/Archiving-Pipeline # first command is the target mount path | ||||||
|       - lsyncd |       - lsyncd | ||||||
|       - /sync/nas_sync.config |       - /sync/nas_sync.config | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										7
									
								
								manual/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								manual/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,7 @@ | |||||||
|  | ### MANUAL TASKS | ||||||
|  |  | ||||||
|  | The files inside this directory contain scripts for repetitive but somewhat automatable tasks. | ||||||
|  |  | ||||||
|  | > ⚠️ warning: | ||||||
|  | >  | ||||||
|  | > Most scripts still require manual intervention before/after running and probably require changes to the code. **Please make sure you understand them before using them!** | ||||||
							
								
								
									
										21
									
								
								manual/batch_archive.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								manual/batch_archive.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,21 @@ | |||||||
|  | """ | ||||||
|  | Saves websites specified in 'batch_urls.txt' to the wayback machine. Outputs archive urls to terminal | ||||||
|  | Hint: use 'python batch_archive.py > batch_archive.txt' to save the output to a file | ||||||
|  | """ | ||||||
|  | from waybackpy import WaybackMachineSaveAPI # upload to archive.org | ||||||
|  | import time | ||||||
|  |  | ||||||
|  | urls = [] | ||||||
|  | with open ("batch_urls.txt", "r") as f: | ||||||
|  |     urls = f.readlines() | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | for i, url in enumerate(urls): | ||||||
|  |     print(f"Saving url {i+1} / {len(urls)}") | ||||||
|  |     user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" # needed? | ||||||
|  |     wayback = WaybackMachineSaveAPI(url, user_agent) | ||||||
|  |     archive_url = wayback.save() | ||||||
|  |     print(archive_url) | ||||||
|  |     time.sleep(20) | ||||||
|  |     # Uploads to archive.org are rate limited | ||||||
							
								
								
									
										18
									
								
								manual/batch_urls.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								manual/batch_urls.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | |||||||
|  | https://id2020.org | ||||||
|  | https://www.weforum.org/platforms/the-centre-for-cybersecurity | ||||||
|  | https://www.unhcr.org/blogs/wp-content/uploads/sites/48/2018/04/fs.pdf | ||||||
|  | https://en.wikipedia.org/wiki/Social_Credit_System | ||||||
|  | https://en.wikipedia.org/wiki/Customer_lifetime_value | ||||||
|  | https://www.weforum.org/reports/the-internet-of-bodies-is-here-tackling-new-challenges-of-technology-governance | ||||||
|  | https://www.un.org/en/about-us/universal-declaration-of-human-rights | ||||||
|  | https://www.biometricupdate.com/201909/id2020-and-partners-launch-program-to-provide-digital-id-with-vaccines | ||||||
|  | https://www.wired.com/2008/06/pb-theory/ | ||||||
|  | https://www.medtechdive.com/news/fda-warns-of-false-positives-with-bd-coronavirus-diagnostic/581115/ | ||||||
|  | https://www.bbc.com/news/world-middle-east-52579475 | ||||||
|  | https://www.timesofisrael.com/over-12000-mistakenly-quarantined-by-phone-tracking-health-ministry-admits/ | ||||||
|  | https://www.delftdesignforvalues.nl | ||||||
|  | https://www.theglobalist.com/technology-big-data-artificial-intelligence-future-peace-rooms/ | ||||||
|  | https://link.springer.com/chapter/10.1007/978-3-319-90869-4_17 | ||||||
|  | https://www.youtube.com/watch?v=_KhAsJRk2lo | ||||||
|  | https://www.bloomberg.org/environment/supporting-sustainable-cities/american-cities-climate-challenge/ | ||||||
|  | https://climatecitycup.org | ||||||
							
								
								
									
										33
									
								
								manual/batch_youtube.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								manual/batch_youtube.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | |||||||
|  | """ | ||||||
|  | Saves youtube videos specified in 'batch_urls.txt' to the local folder. (to be copied manually) | ||||||
|  | """ | ||||||
|  | import youtube_dl | ||||||
|  |  | ||||||
|  | urls = [] | ||||||
|  | with open ("batch_urls.txt", "r") as f: | ||||||
|  |     urls = f.readlines() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def post_download_hook(ret_code): | ||||||
|  |     if ret_code['status'] == 'finished': | ||||||
|  |         file_loc = ret_code["filename"] | ||||||
|  |         print(file_loc) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def save_video(url): | ||||||
|  |     """Saves video accoring to url and save path""" | ||||||
|  |     ydl_opts = { | ||||||
|  |         'format': 'best[height<=720]', | ||||||
|  |         'progress_hooks': [post_download_hook], | ||||||
|  |         'updatetime': False | ||||||
|  |     } | ||||||
|  |     try: | ||||||
|  |         with youtube_dl.YoutubeDL(ydl_opts) as ydl: | ||||||
|  |             ydl.download([url]) | ||||||
|  |     except Exception as e: | ||||||
|  |         print(f"Youtube download crashed: {e}") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | for i, url in enumerate(urls): | ||||||
|  |     print(f"Downloading video {i+1} / {len(urls)}") | ||||||
|  |     save_video(url) | ||||||
| @@ -1,3 +1,6 @@ | |||||||
|  | """ | ||||||
|  | Extracts all urls from a list of mails exported from thunderbird. Writes to 'mails_url_export.json' | ||||||
|  | """ | ||||||
| import os | import os | ||||||
| import re | import re | ||||||
| import json | import json | ||||||
| @@ -19,5 +22,5 @@ for f in all_files: | |||||||
| 
 | 
 | ||||||
| print("Saved {} urls".format(len(all_urls))) | print("Saved {} urls".format(len(all_urls))) | ||||||
| 
 | 
 | ||||||
| with open("media_mails_export.json", "w") as f: | with open("mails_url_export.json", "w") as f: | ||||||
|     json.dump(all_urls, f)   |     json.dump(all_urls, f)   | ||||||
| @@ -1,5 +1,8 @@ | |||||||
|  | """ | ||||||
|  | Runs the news_fetch pipeline against a manually curated list of urls and saves them locally | ||||||
|  | """ | ||||||
| import sys | import sys | ||||||
| sys.path.append("../app") | sys.path.append("../app/news_fetch") | ||||||
| import runner | import runner | ||||||
| import logging | import logging | ||||||
| logger = logging.getLogger() | logger = logging.getLogger() | ||||||
| @@ -11,24 +14,18 @@ console = Console() | |||||||
| 
 | 
 | ||||||
| logger.info("Overwriting production values for single time media-fetch") | logger.info("Overwriting production values for single time media-fetch") | ||||||
| runner.configuration.models.set_db( | runner.configuration.models.set_db( | ||||||
|     runner.configuration.SqliteDatabase("../.dev/media_message_dummy.db"),  # chat_db (not needed here) |  | ||||||
|     runner.configuration.SqliteDatabase("../.dev/media_downloads.db") |     runner.configuration.SqliteDatabase("../.dev/media_downloads.db") | ||||||
| ) | ) | ||||||
| runner.configuration.main_config["DOWNLOADS"]["local_storage_path"] = "../.dev/" | runner.configuration.main_config["DOWNLOADS"]["local_storage_path"] = "../.dev/" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def fetch(): | def fetch(): | ||||||
|     coordinator = runner.Coordinator() |     dispatcher = runner.Dispatcher() | ||||||
| 
 | 
 | ||||||
|  |     dispatcher.workers_in = [{"FetchWorker": runner.FetchWorker(), "DownloadWorker": runner.DownloadWorker()}] | ||||||
|  |     dispatcher.workers_out = [{"PrintWorker": runner.PrintWorker()}] | ||||||
| 
 | 
 | ||||||
|     kwargs = { |     dispatcher.start() | ||||||
|         "worker_download" : runner.DownloadWorker(), |  | ||||||
|         "worker_fetch" : runner.FetchWorker(), |  | ||||||
|         "worker_upload" : runner.UploadWorker(), |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     coordinator.add_workers(**kwargs) |  | ||||||
|     coordinator.start() |  | ||||||
| 
 | 
 | ||||||
|     with open("media_urls.json", "r") as f: |     with open("media_urls.json", "r") as f: | ||||||
|         url_list = json.loads(f.read())  |         url_list = json.loads(f.read())  | ||||||
| @@ -36,9 +33,8 @@ def fetch(): | |||||||
|     logger.info(f"Found {len(url_list)} media urls") |     logger.info(f"Found {len(url_list)} media urls") | ||||||
|     for u in url_list: |     for u in url_list: | ||||||
|         msg_text = f"<{u}|dummy preview text>" |         msg_text = f"<{u}|dummy preview text>" | ||||||
|         dummy_thread = runner.models.Thread() |         dispatcher.incoming_request(msg) | ||||||
|         msg = runner.models.Message(text= msg_text, thread=dummy_thread) | 
 | ||||||
|         coordinator.incoming_request(msg) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def show(): | def show(): | ||||||
| @@ -1,88 +0,0 @@ | |||||||
| import time |  | ||||||
| import keys |  | ||||||
| import slack_sdk |  | ||||||
| from slack_sdk.errors import SlackApiError |  | ||||||
| from peewee import SqliteDatabase |  | ||||||
|  |  | ||||||
| from persistence import  message_models |  | ||||||
| # from bot_utils import messages |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # Constant values... |  | ||||||
| MESSAGES_DB = "/app/containerdata/messages.db" |  | ||||||
|  |  | ||||||
| BOT_ID = "U02MR1R8UJH" |  | ||||||
| ARCHIVE_ID = "C02MM7YG1V4" |  | ||||||
| DEBUG_ID = "C02NM2H9J5Q" |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| client = slack_sdk.WebClient(token=keys.OAUTH_TOKEN) |  | ||||||
|  |  | ||||||
| message_models.set_db(SqliteDatabase(MESSAGES_DB)) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def message_dict_to_model(message): |  | ||||||
|     if message["type"] == "message": |  | ||||||
|         thread_ts = message["thread_ts"] if "thread_ts" in message else message["ts"] |  | ||||||
|         uid = message.get("user", "BAD USER") |  | ||||||
|         user, _ = message_models.User.get_or_create(user_id = uid) |  | ||||||
|         thread, _ = message_models.Thread.get_or_create(thread_ts = thread_ts) |  | ||||||
|         m, new = message_models.Message.get_or_create( |  | ||||||
|             user = user, |  | ||||||
|             thread = thread, |  | ||||||
|             ts = message["ts"], |  | ||||||
|             channel_id = ARCHIVE_ID, |  | ||||||
|             text = message["text"] |  | ||||||
|         ) |  | ||||||
|         print("Saved (text) {} (new={})".format(m, new)) |  | ||||||
|  |  | ||||||
|         for f in message.get("files", []): #default: [] |  | ||||||
|             m.file_type = f["filetype"] |  | ||||||
|             m.perma_link = f["url_private_download"] |  | ||||||
|             m.save() |  | ||||||
|             print("Saved permalink {} to {} (possibly overwriting)".format(f["name"], m)) |  | ||||||
|         if new: |  | ||||||
|             return m |  | ||||||
|         else: |  | ||||||
|             return None |  | ||||||
|     else: |  | ||||||
|         print("What should I do of {}".format(message)) |  | ||||||
|         return None |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def check_all_past_messages(): |  | ||||||
|     last_ts = 0 |  | ||||||
|      |  | ||||||
|     result = client.conversations_history( |  | ||||||
|         channel=ARCHIVE_ID, |  | ||||||
|         oldest=last_ts |  | ||||||
|     ) |  | ||||||
|  |  | ||||||
|     new_messages = result.get("messages", []) # fetches 100 messages by default |  | ||||||
|  |  | ||||||
|     new_fetches = [] |  | ||||||
|     for m in new_messages: |  | ||||||
|         new_fetches.append(message_dict_to_model(m)) |  | ||||||
|     # print(result) |  | ||||||
|     refetch = result.get("has_more", False) |  | ||||||
|     print(f"Refetching : {refetch}") |  | ||||||
|     while refetch: # we have not actually fetched them all |  | ||||||
|         try: |  | ||||||
|             result = client.conversations_history( |  | ||||||
|                 channel = ARCHIVE_ID, |  | ||||||
|                 cursor = result["response_metadata"]["next_cursor"], |  | ||||||
|                 oldest = last_ts |  | ||||||
|             ) # refetches in batches of 100 messages |  | ||||||
|             refetch = result.get("has_more", False) |  | ||||||
|             new_messages = result.get("messages", []) |  | ||||||
|             for m in new_messages: |  | ||||||
|                 new_fetches.append(message_dict_to_model(m)) |  | ||||||
|         except SlackApiError: # Most likely a rate-limit |  | ||||||
|             print("Error while fetching channel messages. (likely rate limit) Retrying in {} seconds...".format(30)) |  | ||||||
|             time.sleep(30) |  | ||||||
|             refetch = True |  | ||||||
|  |  | ||||||
|  |  | ||||||
| check_all_past_messages() |  | ||||||
| @@ -1,38 +0,0 @@ | |||||||
| from peewee import SqliteDatabase |  | ||||||
|  |  | ||||||
| from persistence import article_models, message_models |  | ||||||
|  |  | ||||||
| # Global logger setup: |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # Constant values... |  | ||||||
| DOWNLOADS_DB = "../container_data/downloads.db" |  | ||||||
| MESSAGES_DB = "../container_data/messages.db" |  | ||||||
|  |  | ||||||
| BOT_ID = "U02MR1R8UJH" |  | ||||||
| ARCHIVE_ID = "C02MM7YG1V4" |  | ||||||
| DEBUG_ID = "C02NM2H9J5Q" |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # DB Setup: |  | ||||||
| article_models.set_db(SqliteDatabase( |  | ||||||
|     DOWNLOADS_DB, |  | ||||||
|     pragmas = {'journal_mode': 'wal'} # mutliple threads can access at once |  | ||||||
| )) |  | ||||||
|  |  | ||||||
| message_models.set_db(SqliteDatabase(MESSAGES_DB)) |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| for reaction in message_models.Reaction.select(): |  | ||||||
|     print(reaction)         |  | ||||||
|     thread = reaction.message.thread |  | ||||||
|     articles = message_models.get_referenced_articles(thread, article_models.ArticleDownload) |  | ||||||
|     for a in articles: |  | ||||||
|         print(a) |  | ||||||
|     reaction = reaction.type |  | ||||||
|     status = 1 if reaction == "white_check_mark" else -1 |  | ||||||
|     print(status) |  | ||||||
|     for article in articles: |  | ||||||
|         article.verified = status |  | ||||||
|         article.save() |  | ||||||
| @@ -1,151 +0,0 @@ | |||||||
| [ |  | ||||||
|     "https://www.swissinfo.ch/ger/wirtschaft/koennen-ki-und-direkte-demokratie-nebeneinander-bestehen-/47542048", |  | ||||||
|     "https://www.zeit.de/2011/33/CH-Oekonophysik", |  | ||||||
|     "https://ourworld.unu.edu/en/green-idea-self-organizing-traffic-signals", |  | ||||||
|     "https://www.youtube.com/watch?v=-FQD4ie9UYA", |  | ||||||
|     "https://www.brandeins.de/corporate-services/mck-wissen/mck-wissen-logistik/schwaermen-fuer-das-optimum", |  | ||||||
|     "https://www.youtube.com/watch?v=upQM4Xzh8zM", |  | ||||||
|     "https://www.youtube.com/watch?v=gAkoprZmW4k", |  | ||||||
|     "https://www.youtube.com/watch?v=VMzfDVAWXHI&t=1s", |  | ||||||
|     "https://www.youtube.com/watch?v=1SwTiIlkndE", |  | ||||||
|     "https://www.informatik-aktuell.de/management-und-recht/digitalisierung/digitale-revolution-und-oekonomie-40-quo-vadis.html", |  | ||||||
|     "https://www.youtube.com/watch?v=cSvvH0SBFOw", |  | ||||||
|     "https://www.linkedin.com/posts/margit-osterloh-24198a104_pl%C3%A4doyer-gegen-sprechverbote-ugcPost-6925702100450480129-K7Dl?utm_source=linkedin_share&utm_medium=member_desktop_web", |  | ||||||
|     "https://www.nebelspalter.ch/plaedoyer-gegen-sprechverbote", |  | ||||||
|     "https://falling-walls.com/people/dirk-helbing/", |  | ||||||
|     "https://digitalsensemaker.podigee.io/3-2-mit-dirk-helbing", |  | ||||||
|     "https://www.blick.ch/wirtschaft/musk-als-hueter-der-redefreiheit-eth-experte-sagt-musks-vorhaben-hat-potenzial-aber-id17437811.html", |  | ||||||
|     "https://www.trend.at/standpunkte/mit-verantwortung-zukunft-10082300", |  | ||||||
|     "https://www.pantarhei.ch/podcast/", |  | ||||||
|     "https://ethz.ch/en/industry/industry/news/data/2022/04/intelligent-traffic-lights-for-optimal-traffic-flow.html", |  | ||||||
|     "https://ethz.ch/de/wirtschaft/industry/news/data/2022/04/optimaler-verkehrsfluss-mit-intelligenten-ampeln.html", |  | ||||||
|     "https://www.spektrum.de/news/die-verschlungenen-wege-der-menschen/1181815", |  | ||||||
|     "https://www.pcwelt.de/a/diktatur-4-0-schoene-neue-digitalisierte-welt,3447005", |  | ||||||
|     "https://www.nzz.ch/english/cancel-culture-at-eth-a-professor-receives-death-threats-over-a-lecture-slide-ld.1675322", |  | ||||||
|     "https://www.brandeins.de/corporate-services/mck-wissen/mck-wissen-logistik/schwaermen-fuer-das-optimum", |  | ||||||
|     "https://www.achgut.com/artikel/ausgestossene_der_woche_prinz_william_als_immaginierter_rassist", |  | ||||||
|     "https://www.pinterpolitik.com/in-depth/klaim-big-data-luhut-perlu-diuji/", |  | ||||||
|     "https://www.srf.ch/kultur/gesellschaft-religion/eklat-an-der-eth-wenn-ein-angeblicher-schweinevergleich-zur-staatsaffaere-wird", |  | ||||||
|     "https://open.spotify.com/episode/6s1icdoplZeNOINvx6ZHTd?si=610a699eba004da2&nd=1", |  | ||||||
|     "https://www.nzz.ch/schweiz/shitstorm-an-der-eth-ein-professor-erhaelt-morddrohungen-ld.1673554", |  | ||||||
|     "https://www.nzz.ch/schweiz/shitstorm-an-der-eth-ein-professor-erhaelt-morddrohungen-ld.1673554", |  | ||||||
|     "https://djmag.com/features/after-astroworld-what-being-done-stop-crowd-crushes-happening-again", |  | ||||||
|     "https://prisma-hsg.ch/articles/meine-daten-deine-daten-unsere-daten/", |  | ||||||
|     "https://www.srf.ch/audio/focus/zukunftsforscher-dirk-helbing-die-welt-ist-keine-maschine?id=10756661", |  | ||||||
|     "https://www.20min.ch/story/roboter-fuer-hunde-machen-wenig-sinn-647302764916", |  | ||||||
|     "https://www.wienerzeitung.at/nachrichten/wissen/mensch/942890-Roboter-als-Praesidentschaftskandidaten.html", |  | ||||||
|     "https://disruptors.fm/11-building-a-crystal-ball-of-the-world-unseating-capitalism-and-creating-a-new-world-order-with-prof-dirk-helbing/", |  | ||||||
|     "https://www.spreaker.com/user/disruptorsfm/11-building-crystal-ball-of-the-world-un", |  | ||||||
|     "https://www.youtube.com/watch?v=fRkCMC3zqSQ", |  | ||||||
|     "https://arstechnica.com/science/2021/11/what-the-physics-of-crowds-can-tell-us-about-the-tragic-deaths-at-astroworld/", |  | ||||||
|     "https://www.fox23.com/news/trending/astroworld-festival-big-crowds-can-flow-like-liquid-with-terrifying-results/37QH6Q4RGFELHGCZSZTBV46STU/", |  | ||||||
|     "https://futurism.com/astroworld-theory-deaths-bodies-fluid", |  | ||||||
|     "https://www.businessinsider.com/why-people-died-astroworld-crowd-crush-physics-fluid-dynamics-2021-11", |  | ||||||
|     "https://theconversation.com/ten-tips-for-surviving-a-crowd-crush-112169", |  | ||||||
|     "https://www.limmattalerzeitung.ch/basel/das-wort-zum-tag-kopie-von-4-januar-hypotenuse-schlaegt-kathete-trivia-trampel-pandemie-ld.2233931", |  | ||||||
|     "https://magazine.swissinformatics.org/en/whats-wrong-with-ai/", |  | ||||||
|     "https://magazine.swissinformatics.org/en/whats-wrong-with-ai/", |  | ||||||
|     "https://www.netkwesties.nl/1541/wrr-ai-wordt-de-verbrandingsmotor-van.htm", |  | ||||||
|     "https://youtu.be/ptm9zLG2KaE", |  | ||||||
|     "https://www.deutschlandfunkkultur.de/die-zukunft-der-demokratie-mehr-teilhabe-von-unten-wagen.976.de.html?dram:article_id=468341", |  | ||||||
|     "https://www.springer.com/gp/book/9783642240034", |  | ||||||
|     "https://www.springer.com/de/book/9783319908687", |  | ||||||
|     "https://technikjournal.de/2017/08/02/ein-plaedoyer-fuer-die-digitale-demokratie/", |  | ||||||
|     "https://technikjournal.de/2017/08/02/ein-plaedoyer-fuer-die-digitale-demokratie/", |  | ||||||
|     "https://trafo.hypotheses.org/23989", |  | ||||||
|     "https://web.archive.org/web/20200609053329/https://www.wiko-berlin.de/institution/projekte-kooperationen/projekte/working-futures/wiko-briefs-working-futures-in-corona-times/the-corona-crisis-reveals-the-struggle-for-a-sustainable-digital-future/", |  | ||||||
|     "https://www.wiko-berlin.de/institution/projekte-kooperationen/projekte/working-futures/wiko-briefs-working-futures-in-corona-times/", |  | ||||||
|     "https://www.youtube.com/watch?v=gAkoprZmW4k", |  | ||||||
|     "https://www.rhein-zeitung.de/region/aus-den-lokalredaktionen/nahe-zeitung_artikel,-peter-flaschels-lebenswerk-hat-die-sozialgeschichte-beeinflusst-_arid,2322161.html", |  | ||||||
|     "https://www.blick.ch/wirtschaft/online-boom-ohne-ende-corona-befeuert-die-tech-revolution-id16359910.html", |  | ||||||
|     "https://www.nzz.ch/meinung/china-unterwirft-tech-und-social-media-das-geht-auch-europa-an-ld.1643010", |  | ||||||
|     "https://www.say.media/article/la-mort-par-algorithme", |  | ||||||
|     "https://www.suedostschweiz.ch/aus-dem-leben/2021-08-14/stau-ist-nicht-gleich-stau", |  | ||||||
|     "https://www.swissinfo.ch/eng/directdemocracy/political-perspectives_digital-democracy--too-risky--or-the-chance-of-a-generation-/43836222", |  | ||||||
|     "https://kow-berlin.com/exhibitions/illusion-einer-menschenmenge", |  | ||||||
|     "https://www.springer.com/gp/book/9783642240034", |  | ||||||
|     "https://www.springer.com/de/book/9783319908687", |  | ||||||
|     "https://www.politik-kommunikation.de/ressorts/artikel/eine-gefaehrliche-machtasymmetrie-1383558602", |  | ||||||
|     "https://www.springer.com/gp/book/9783642240034", |  | ||||||
|     "https://www.springer.com/de/book/9783319908687", |  | ||||||
|     "https://solutions.hamburg/ethik-und-digitalisierung-nicht-voneinander-getrennt-betrachten/", |  | ||||||
|     "https://www.springer.com/gp/book/9783642240034", |  | ||||||
|     "https://www.springer.com/de/book/9783319908687", |  | ||||||
|     "https://avenue.argusdatainsights.ch/Article/AvenueClip?artikelHash=d14d91ec9a8b4cb0b6bb3012c0cefd8b_27F0B19422F1F03723769C18906AA1EE&artikelDateiId=298862327", |  | ||||||
|     "https://www.tagblatt.ch/kultur/grosses-ranking-ihre-stimme-hat-gewicht-das-sind-die-50-profiliertesten-intellektuellen-der-schweiz-ld.2182261", |  | ||||||
|     "https://reliefweb.int/report/world/building-multisystemic-understanding-societal-resilience-covid-19-pandemic", |  | ||||||
|     "https://reliefweb.int/report/world/building-multisystemic-understanding-societal-resilience-covid-19-pandemic", |  | ||||||
|     "https://www.events.at/e/wie-wir-in-zukunft-leben-wollen-die-stadt-als-datenfeld", |  | ||||||
|     "https://www.events.at/e/wie-wir-in-zukunft-leben-wollen-die-stadt-als-datenfeld", |  | ||||||
|     "https://greennetproject.org/en/2018/11/27/prof-dirk-helbing-es-braucht-vor-allem-tolle-ideen-in-die-sich-die-leute-verlieben/", |  | ||||||
|     "https://www.hpcwire.com/2011/05/06/simulating_society_at_the_global_scale/", |  | ||||||
|     "https://www.technologyreview.com/2010/04/30/204005/europes-plan-to-simulate-the-entire-planet/", |  | ||||||
|     "https://komentare.sme.sk/c/22543617/smrt-podla-algoritmu.html", |  | ||||||
|     "https://komentare.sme.sk/c/22543617/smrt-podla-algoritmu.html", |  | ||||||
|     "https://www.confidencial.com.ni/opinion/muerte-por-algoritmo/", |  | ||||||
|     "https://www.nzz.ch/panorama/wie-kann-eine-massenpanik-verhindert-werden-ld.1614761", |  | ||||||
|     "https://www.20min.ch/story/roboter-fuer-hunde-machen-wenig-sinn-647302764916", |  | ||||||
|     "https://www.wienerzeitung.at/nachrichten/wissen/mensch/942890-Roboter-als-Praesidentschaftskandidaten.html", |  | ||||||
|     "https://www.srf.ch/audio/focus/zukunftsforscher-dirk-helbing-die-welt-ist-keine-maschine?id=10756661", |  | ||||||
|     "https://disruptors.fm/11-building-a-crystal-ball-of-the-world-unseating-capitalism-and-creating-a-new-world-order-with-prof-dirk-helbing/", |  | ||||||
|     "https://www.spreaker.com/user/disruptorsfm/11-building-crystal-ball-of-the-world-un", |  | ||||||
|     "https://www.youtube.com/watch?v=fRkCMC3zqSQ", |  | ||||||
|     "https://arstechnica.com/science/2021/11/what-the-physics-of-crowds-can-tell-us-about-the-tragic-deaths-at-astroworld/", |  | ||||||
|     "https://www.fox23.com/news/trending/astroworld-festival-big-crowds-can-flow-like-liquid-with-terrifying-results/37QH6Q4RGFELHGCZSZTBV46STU/", |  | ||||||
|     "https://futurism.com/astroworld-theory-deaths-bodies-fluid", |  | ||||||
|     "https://www.businessinsider.com/why-people-died-astroworld-crowd-crush-physics-fluid-dynamics-2021-11", |  | ||||||
|     "https://theconversation.com/ten-tips-for-surviving-a-crowd-crush-112169", |  | ||||||
|     "https://www.limmattalerzeitung.ch/basel/das-wort-zum-tag-kopie-von-4-januar-hypotenuse-schlaegt-kathete-trivia-trampel-pandemie-ld.2233931", |  | ||||||
|     "https://www.pantarhei.ch/podcast/", |  | ||||||
|     "https://www.focus.it/scienza/scienze/folla-fisica-modelli-simulazioni", |  | ||||||
|     "https://www.focus.it/scienza/scienze/folla-fisica-modelli-simulazioni", |  | ||||||
|     "https://www.netkwesties.nl/1541/wrr-ai-wordt-de-verbrandingsmotor-van.htm", |  | ||||||
|     "https://www.transformationbeats.com/de/transformation/digitale-gesellschaft/", |  | ||||||
|     "https://www.transformationbeats.com/de/transformation/digitale-gesellschaft/", |  | ||||||
|     "https://www.suedkurier.de/ueberregional/wirtschaft/Wie-uns-der-Staat-heimlich-erzieht-sogar-auf-dem-Klo;art416,8763904", |  | ||||||
|     "https://www.suedkurier.de/ueberregional/wirtschaft/Wie-uns-der-Staat-heimlich-erzieht-sogar-auf-dem-Klo;art416,8763904", |  | ||||||
|     "https://www.deutschlandfunkkultur.de/die-zukunft-der-demokratie-mehr-teilhabe-von-unten-wagen.976.de.html?dram:article_id=468341", |  | ||||||
|     "https://www.springer.com/gp/book/9783642240034", |  | ||||||
|     "https://www.springer.com/de/book/9783319908687", |  | ||||||
|     "https://trafo.hypotheses.org/23989", |  | ||||||
|     "https://web.archive.org/web/20200609053329/https://www.wiko-berlin.de/institution/projekte-kooperationen/projekte/working-futures/wiko-briefs-working-futures-in-corona-times/the-corona-crisis-reveals-the-struggle-for-a-sustainable-digital-future/", |  | ||||||
|     "https://www.wiko-berlin.de/institution/projekte-kooperationen/projekte/working-futures/wiko-briefs-working-futures-in-corona-times/", |  | ||||||
|     "https://www.youtube.com/watch?v=gAkoprZmW4k", |  | ||||||
|     "https://futurium.de/de/gespraech/ranga-yogeshwar-1/ranga-yogeshwar-dirk-helbing-mit-musik-von-till-broenner", |  | ||||||
|     "https://www.springer.com/gp/book/9783642240034", |  | ||||||
|     "https://www.springer.com/de/book/9783319908687", |  | ||||||
|     "https://idw-online.de/en/news113518", |  | ||||||
|     "https://blmplus.de/die-digitalcharta-ist-erst-der-anfang-ein-szenario-von-dirk-helbing/", |  | ||||||
|     "https://www.risiko-dialog.ch/big-nudging-vom-computer-gelenkt-aber-wohin/", |  | ||||||
|     "https://idw-online.de/de/news13986", |  | ||||||
|     "https://www.uni-stuttgart.de/presse/archiv/uni-kurier/uk84_85/forschung/fw66.html", |  | ||||||
|     "https://www.infosperber.ch/medien/trends/rankings-oft-unbrauchbar-so-oder-so-aber-immer-schadlich/", |  | ||||||
|     "https://www.infosperber.ch/medien/trends/rankings-oft-unbrauchbar-so-oder-so-aber-immer-schadlich/", |  | ||||||
|     "https://www.nzz.ch/meinung/china-unterwirft-tech-und-social-media-das-geht-auch-europa-an-ld.1643010", |  | ||||||
|     "https://www.suedostschweiz.ch/aus-dem-leben/2021-08-14/stau-ist-nicht-gleich-stau", |  | ||||||
|     "https://www.swissinfo.ch/eng/directdemocracy/political-perspectives_digital-democracy--too-risky--or-the-chance-of-a-generation-/43836222", |  | ||||||
|     "https://werteundwandel.de/inhalte/d2030-in-aufbruchstimmung-fuer-eine-lebenswerte-zukunft/", |  | ||||||
|     "https://www.springer.com/gp/book/9783642240034", |  | ||||||
|     "https://www.springer.com/de/book/9783319908687", |  | ||||||
|     "https://www.youtube.com/watch?v=n9e77iYZPEY", |  | ||||||
|     "https://greennetproject.org/en/2018/11/27/prof-dirk-helbing-es-braucht-vor-allem-tolle-ideen-in-die-sich-die-leute-verlieben/", |  | ||||||
|     "https://www.hpcwire.com/2011/05/06/simulating_society_at_the_global_scale/", |  | ||||||
|     "https://www.say.media/article/la-mort-par-algorithme", |  | ||||||
|     "https://www.confidencial.com.ni/opinion/muerte-por-algoritmo/", |  | ||||||
|     "https://www.nzz.ch/panorama/wie-kann-eine-massenpanik-verhindert-werden-ld.1614761", |  | ||||||
|     "https://www.nesta.org.uk/report/digital-democracy-the-tools-transforming-political-engagement/", |  | ||||||
|     "https://www.nature.com/articles/news.2010.351", |  | ||||||
|     "https://www.focus.de/panorama/welt/tid-19265/gastkommentar-nutzt-die-moeglichkeiten-des-computers_aid_534372.html", |  | ||||||
|     "https://www.theglobalist.com/democracy-technology-innovation-society-internet/", |  | ||||||
|     "https://www.theglobalist.com/capitalism-democracy-technology-surveillance-privacy/", |  | ||||||
|     "https://www.theglobalist.com/google-artificial-intelligence-big-data-technology-future/", |  | ||||||
|     "https://www.theglobalist.com/fascism-big-data-artificial-intelligence-surveillance-democracy/", |  | ||||||
|     "https://www.theglobalist.com/technology-big-data-artificial-intelligence-future-peace-rooms/", |  | ||||||
|     "https://www.theglobalist.com/technology-society-sustainability-future-humanity/", |  | ||||||
|     "https://www.theglobalist.com/society-technology-peace-sustainability/", |  | ||||||
|     "https://www.theglobalist.com/democracy-technology-social-media-artificial-intelligence/", |  | ||||||
|     "https://www.theglobalist.com/financial-system-reform-economy-internet-of-things-capitalism/", |  | ||||||
|     "https://www.theglobalist.com/capitalism-society-equality-sustainability-crowd-funding/", |  | ||||||
|     "https://www.theglobalist.com/united-nations-world-government-peace-sustainability-society/", |  | ||||||
|     "https://www.theglobalist.com/world-economy-sustainability-environment-society/" |  | ||||||
| ] |  | ||||||
| @@ -1,61 +0,0 @@ | |||||||
| import youtube_dl |  | ||||||
| from waybackpy import WaybackMachineSaveAPI # upload to archive.org |  | ||||||
| import time |  | ||||||
|  |  | ||||||
|  |  | ||||||
| urls = [ |  | ||||||
| "https://id2020.org", |  | ||||||
| "https://www.weforum.org/platforms/the-centre-for-cybersecurity", |  | ||||||
| "https://www.unhcr.org/blogs/wp-content/uploads/sites/48/2018/04/fs.pdf", |  | ||||||
| "https://en.wikipedia.org/wiki/Social_Credit_System", |  | ||||||
| "https://en.wikipedia.org/wiki/Customer_lifetime_value", |  | ||||||
| "https://www.weforum.org/reports/the-internet-of-bodies-is-here-tackling-new-challenges-of-technology-governance", |  | ||||||
| "https://www.un.org/en/about-us/universal-declaration-of-human-rights", |  | ||||||
| "https://www.biometricupdate.com/201909/id2020-and-partners-launch-program-to-provide-digital-id-with-vaccines", |  | ||||||
| "https://www.wired.com/2008/06/pb-theory/", |  | ||||||
| "https://www.medtechdive.com/news/fda-warns-of-false-positives-with-bd-coronavirus-diagnostic/581115/", |  | ||||||
| "https://www.bbc.com/news/world-middle-east-52579475", |  | ||||||
| "https://www.timesofisrael.com/over-12000-mistakenly-quarantined-by-phone-tracking-health-ministry-admits/", |  | ||||||
| "https://www.delftdesignforvalues.nl", |  | ||||||
| "https://www.theglobalist.com/technology-big-data-artificial-intelligence-future-peace-rooms/", |  | ||||||
| "https://link.springer.com/chapter/10.1007/978-3-319-90869-4_17", |  | ||||||
| "https://www.youtube.com/watch?v=_KhAsJRk2lo", |  | ||||||
| "https://www.bloomberg.org/environment/supporting-sustainable-cities/american-cities-climate-challenge/", |  | ||||||
| "https://climatecitycup.org", |  | ||||||
|  |  | ||||||
| ] |  | ||||||
|  |  | ||||||
| def post_download_hook(ret_code): |  | ||||||
|     # print(ret_code) |  | ||||||
|     if ret_code['status'] == 'finished': |  | ||||||
|         file_loc = ret_code["filename"] |  | ||||||
|         print(file_loc) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def save_video(url): |  | ||||||
|     """Saves video accoring to url and save path""" |  | ||||||
|     ydl_opts = { |  | ||||||
|         'format': 'best[height<=720]', |  | ||||||
|         # 'outtmpl': f"{file_path}.%(ext)s", # basically the filename from the object, but with a custom extension depending on the download |  | ||||||
|         'progress_hooks': [post_download_hook], |  | ||||||
|         'updatetime': False |  | ||||||
|     } |  | ||||||
|     try: |  | ||||||
|         with youtube_dl.YoutubeDL(ydl_opts) as ydl: |  | ||||||
|             ydl.download([url]) |  | ||||||
|             # article file name is updated in self.post_download_hook |  | ||||||
|     except Exception as e: |  | ||||||
|         print(f"Youtube download crashed: {e}") |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # for i, url in enumerate(urls): |  | ||||||
| #     print(f"Downloading video {i+1} / {len(urls)}") |  | ||||||
|     # save_video(url) |  | ||||||
|  |  | ||||||
| for i, url in enumerate(urls): |  | ||||||
|     print(f"Saving url {i+1} / {len(urls)}") |  | ||||||
|     user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" # needed? |  | ||||||
|     wayback = WaybackMachineSaveAPI(url, user_agent) |  | ||||||
|     archive_url = wayback.save() |  | ||||||
|     print(archive_url) |  | ||||||
|     time.sleep(20) |  | ||||||
| @@ -2,10 +2,10 @@ FROM python:latest | |||||||
|  |  | ||||||
| ENV TZ Europe/Zurich | ENV TZ Europe/Zurich | ||||||
|  |  | ||||||
| RUN mkdir -p /app/auto_news | RUN mkdir -p /app/news_fetch | ||||||
|  |  | ||||||
| COPY requirements.txt /app/requirements.txt | COPY requirements.txt /app/requirements.txt | ||||||
| RUN python3 -m pip install -r /app/requirements.txt | RUN python3 -m pip install -r /app/requirements.txt | ||||||
|  |  | ||||||
| COPY . /app/auto_news | COPY . /app/news_fetch | ||||||
| WORKDIR /app/auto_news | WORKDIR /app/news_fetch | ||||||
|   | |||||||
| @@ -126,13 +126,12 @@ class Dispatcher(Thread): | |||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     # def manual_processing(self, articles, workers): |  | ||||||
|     #     for w in workers: |  | ||||||
|     #         w.start() |  | ||||||
|  |  | ||||||
|     #     for article in articles: | class PrintWorker: | ||||||
|     #         notifier = lambda article: logger.info(f"Completed manual actions for {article}") |     def send(self, article): | ||||||
|     #         ArticleWatcher(article, workers_manual = workers, notifier = notifier) # Article watcher wants a thread to link article to TODO: handle threads as a kwarg  |         print(f"Uploaded article {article}") | ||||||
|  |     def keep_alive(self): # keeps script running, because there is nothing else in the main thread | ||||||
|  |         while True: sleep(1) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -140,11 +139,6 @@ if __name__ == "__main__": | |||||||
|     dispatcher = Dispatcher() |     dispatcher = Dispatcher() | ||||||
|  |  | ||||||
|     if "upload" in sys.argv: |     if "upload" in sys.argv: | ||||||
|         class PrintWorker: |  | ||||||
|             def send(self, article): |  | ||||||
|                 print(f"Uploaded article {article}") |  | ||||||
|             def keep_alive(self): # keeps script running, because there is nothing else in the main thread |  | ||||||
|                 while True: sleep(1) |  | ||||||
|  |  | ||||||
|         articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "" or models.ArticleDownload.archive_url == "TODO:UPLOAD").execute() |         articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "" or models.ArticleDownload.archive_url == "TODO:UPLOAD").execute() | ||||||
|         logger.info(f"Launching upload to archive for {len(articles)} articles.") |         logger.info(f"Launching upload to archive for {len(articles)} articles.") | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user