update nas target, documentation
This commit is contained in:
parent
6c08dec20a
commit
e6bfe811d0
8
config/README.md
Normal file
8
config/README.md
Normal file
@ -0,0 +1,8 @@
|
||||
## Configuration: example
|
||||
The files inside this directory (not the ones in `env/`) are a sample of the required configuration.
|
||||
|
||||
Please create a copy of these files under `<location of downloads>/config/...`.
|
||||
|
||||
> Note:
|
||||
>
|
||||
> Some of the fields are blank, please fill them in as needed.
|
0
env/debug → config/env/debug
vendored
0
env/debug → config/env/debug
vendored
0
env/production → config/env/production
vendored
0
env/production → config/env/production
vendored
@ -25,7 +25,7 @@ db_printout: /app/containerdata/backups
|
||||
local_storage_path: /app/containerdata/files
|
||||
debug_storage_path: /app/containerdata/debug/
|
||||
default_download_path: /app/containerdata/tmp
|
||||
remote_storage_path: /helbing_support/Files RM/Archiving
|
||||
remote_storage_path: /helbing_support/Archiving-Pipeline
|
||||
browser_profile_path: /app/containerdata/dependencies/news_fetch.profile
|
||||
# please keep this exact name
|
||||
browser_print_delay: 3
|
@ -28,7 +28,7 @@ services:
|
||||
- ${CONTAINER_DATA}/config/nas_sync.config:/sync/nas_sync.config
|
||||
- ${CONTAINER_DATA}/config/nas_login.config:/sync/nas_login.config
|
||||
command:
|
||||
- nas22.ethz.ch/gess_coss_1/helbing_support/Files RM/Archiving/TEST # first command is the target mount path
|
||||
- nas22.ethz.ch/gess_coss_1/helbing_support/Archiving-Pipeline # first command is the target mount path
|
||||
- lsyncd
|
||||
- /sync/nas_sync.config
|
||||
|
||||
|
7
manual/README.md
Normal file
7
manual/README.md
Normal file
@ -0,0 +1,7 @@
|
||||
### MANUAL TASKS
|
||||
|
||||
The files inside this directory contain scripts for repetitive but somewhat automatable tasks.
|
||||
|
||||
> ⚠️ warning:
|
||||
>
|
||||
> Most scripts still require manual intervention before/after running and probably require changes to the code. **Please make sure you understand them before using them!**
|
21
manual/batch_archive.py
Normal file
21
manual/batch_archive.py
Normal file
@ -0,0 +1,21 @@
|
||||
"""
|
||||
Saves websites specified in 'batch_urls.txt' to the wayback machine. Outputs archive urls to terminal
|
||||
Hint: use 'python batch_archive.py > batch_archive.txt' to save the output to a file
|
||||
"""
|
||||
from waybackpy import WaybackMachineSaveAPI # upload to archive.org
|
||||
import time
|
||||
|
||||
urls = []
|
||||
with open ("batch_urls.txt", "r") as f:
|
||||
urls = f.readlines()
|
||||
|
||||
|
||||
|
||||
for i, url in enumerate(urls):
|
||||
print(f"Saving url {i+1} / {len(urls)}")
|
||||
user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" # needed?
|
||||
wayback = WaybackMachineSaveAPI(url, user_agent)
|
||||
archive_url = wayback.save()
|
||||
print(archive_url)
|
||||
time.sleep(20)
|
||||
# Uploads to archive.org are rate limited
|
18
manual/batch_urls.txt
Normal file
18
manual/batch_urls.txt
Normal file
@ -0,0 +1,18 @@
|
||||
https://id2020.org
|
||||
https://www.weforum.org/platforms/the-centre-for-cybersecurity
|
||||
https://www.unhcr.org/blogs/wp-content/uploads/sites/48/2018/04/fs.pdf
|
||||
https://en.wikipedia.org/wiki/Social_Credit_System
|
||||
https://en.wikipedia.org/wiki/Customer_lifetime_value
|
||||
https://www.weforum.org/reports/the-internet-of-bodies-is-here-tackling-new-challenges-of-technology-governance
|
||||
https://www.un.org/en/about-us/universal-declaration-of-human-rights
|
||||
https://www.biometricupdate.com/201909/id2020-and-partners-launch-program-to-provide-digital-id-with-vaccines
|
||||
https://www.wired.com/2008/06/pb-theory/
|
||||
https://www.medtechdive.com/news/fda-warns-of-false-positives-with-bd-coronavirus-diagnostic/581115/
|
||||
https://www.bbc.com/news/world-middle-east-52579475
|
||||
https://www.timesofisrael.com/over-12000-mistakenly-quarantined-by-phone-tracking-health-ministry-admits/
|
||||
https://www.delftdesignforvalues.nl
|
||||
https://www.theglobalist.com/technology-big-data-artificial-intelligence-future-peace-rooms/
|
||||
https://link.springer.com/chapter/10.1007/978-3-319-90869-4_17
|
||||
https://www.youtube.com/watch?v=_KhAsJRk2lo
|
||||
https://www.bloomberg.org/environment/supporting-sustainable-cities/american-cities-climate-challenge/
|
||||
https://climatecitycup.org
|
33
manual/batch_youtube.py
Normal file
33
manual/batch_youtube.py
Normal file
@ -0,0 +1,33 @@
|
||||
"""
|
||||
Saves youtube videos specified in 'batch_urls.txt' to the local folder. (to be copied manually)
|
||||
"""
|
||||
import youtube_dl
|
||||
|
||||
urls = []
|
||||
with open ("batch_urls.txt", "r") as f:
|
||||
urls = f.readlines()
|
||||
|
||||
|
||||
def post_download_hook(ret_code):
|
||||
if ret_code['status'] == 'finished':
|
||||
file_loc = ret_code["filename"]
|
||||
print(file_loc)
|
||||
|
||||
|
||||
def save_video(url):
|
||||
"""Saves video accoring to url and save path"""
|
||||
ydl_opts = {
|
||||
'format': 'best[height<=720]',
|
||||
'progress_hooks': [post_download_hook],
|
||||
'updatetime': False
|
||||
}
|
||||
try:
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download([url])
|
||||
except Exception as e:
|
||||
print(f"Youtube download crashed: {e}")
|
||||
|
||||
|
||||
for i, url in enumerate(urls):
|
||||
print(f"Downloading video {i+1} / {len(urls)}")
|
||||
save_video(url)
|
@ -1,3 +1,6 @@
|
||||
"""
|
||||
Extracts all urls from a list of mails exported from thunderbird. Writes to 'mails_url_export.json'
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
@ -19,5 +22,5 @@ for f in all_files:
|
||||
|
||||
print("Saved {} urls".format(len(all_urls)))
|
||||
|
||||
with open("media_mails_export.json", "w") as f:
|
||||
with open("mails_url_export.json", "w") as f:
|
||||
json.dump(all_urls, f)
|
@ -1,5 +1,8 @@
|
||||
"""
|
||||
Runs the news_fetch pipeline against a manually curated list of urls and saves them locally
|
||||
"""
|
||||
import sys
|
||||
sys.path.append("../app")
|
||||
sys.path.append("../app/news_fetch")
|
||||
import runner
|
||||
import logging
|
||||
logger = logging.getLogger()
|
||||
@ -11,24 +14,18 @@ console = Console()
|
||||
|
||||
logger.info("Overwriting production values for single time media-fetch")
|
||||
runner.configuration.models.set_db(
|
||||
runner.configuration.SqliteDatabase("../.dev/media_message_dummy.db"), # chat_db (not needed here)
|
||||
runner.configuration.SqliteDatabase("../.dev/media_downloads.db")
|
||||
)
|
||||
runner.configuration.main_config["DOWNLOADS"]["local_storage_path"] = "../.dev/"
|
||||
|
||||
|
||||
def fetch():
|
||||
coordinator = runner.Coordinator()
|
||||
dispatcher = runner.Dispatcher()
|
||||
|
||||
dispatcher.workers_in = [{"FetchWorker": runner.FetchWorker(), "DownloadWorker": runner.DownloadWorker()}]
|
||||
dispatcher.workers_out = [{"PrintWorker": runner.PrintWorker()}]
|
||||
|
||||
kwargs = {
|
||||
"worker_download" : runner.DownloadWorker(),
|
||||
"worker_fetch" : runner.FetchWorker(),
|
||||
"worker_upload" : runner.UploadWorker(),
|
||||
}
|
||||
|
||||
coordinator.add_workers(**kwargs)
|
||||
coordinator.start()
|
||||
dispatcher.start()
|
||||
|
||||
with open("media_urls.json", "r") as f:
|
||||
url_list = json.loads(f.read())
|
||||
@ -36,9 +33,8 @@ def fetch():
|
||||
logger.info(f"Found {len(url_list)} media urls")
|
||||
for u in url_list:
|
||||
msg_text = f"<{u}|dummy preview text>"
|
||||
dummy_thread = runner.models.Thread()
|
||||
msg = runner.models.Message(text= msg_text, thread=dummy_thread)
|
||||
coordinator.incoming_request(msg)
|
||||
dispatcher.incoming_request(msg)
|
||||
|
||||
|
||||
|
||||
def show():
|
@ -1,88 +0,0 @@
|
||||
import time
|
||||
import keys
|
||||
import slack_sdk
|
||||
from slack_sdk.errors import SlackApiError
|
||||
from peewee import SqliteDatabase
|
||||
|
||||
from persistence import message_models
|
||||
# from bot_utils import messages
|
||||
|
||||
|
||||
|
||||
# Constant values...
|
||||
MESSAGES_DB = "/app/containerdata/messages.db"
|
||||
|
||||
BOT_ID = "U02MR1R8UJH"
|
||||
ARCHIVE_ID = "C02MM7YG1V4"
|
||||
DEBUG_ID = "C02NM2H9J5Q"
|
||||
|
||||
|
||||
|
||||
client = slack_sdk.WebClient(token=keys.OAUTH_TOKEN)
|
||||
|
||||
message_models.set_db(SqliteDatabase(MESSAGES_DB))
|
||||
|
||||
|
||||
def message_dict_to_model(message):
|
||||
if message["type"] == "message":
|
||||
thread_ts = message["thread_ts"] if "thread_ts" in message else message["ts"]
|
||||
uid = message.get("user", "BAD USER")
|
||||
user, _ = message_models.User.get_or_create(user_id = uid)
|
||||
thread, _ = message_models.Thread.get_or_create(thread_ts = thread_ts)
|
||||
m, new = message_models.Message.get_or_create(
|
||||
user = user,
|
||||
thread = thread,
|
||||
ts = message["ts"],
|
||||
channel_id = ARCHIVE_ID,
|
||||
text = message["text"]
|
||||
)
|
||||
print("Saved (text) {} (new={})".format(m, new))
|
||||
|
||||
for f in message.get("files", []): #default: []
|
||||
m.file_type = f["filetype"]
|
||||
m.perma_link = f["url_private_download"]
|
||||
m.save()
|
||||
print("Saved permalink {} to {} (possibly overwriting)".format(f["name"], m))
|
||||
if new:
|
||||
return m
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
print("What should I do of {}".format(message))
|
||||
return None
|
||||
|
||||
|
||||
def check_all_past_messages():
|
||||
last_ts = 0
|
||||
|
||||
result = client.conversations_history(
|
||||
channel=ARCHIVE_ID,
|
||||
oldest=last_ts
|
||||
)
|
||||
|
||||
new_messages = result.get("messages", []) # fetches 100 messages by default
|
||||
|
||||
new_fetches = []
|
||||
for m in new_messages:
|
||||
new_fetches.append(message_dict_to_model(m))
|
||||
# print(result)
|
||||
refetch = result.get("has_more", False)
|
||||
print(f"Refetching : {refetch}")
|
||||
while refetch: # we have not actually fetched them all
|
||||
try:
|
||||
result = client.conversations_history(
|
||||
channel = ARCHIVE_ID,
|
||||
cursor = result["response_metadata"]["next_cursor"],
|
||||
oldest = last_ts
|
||||
) # refetches in batches of 100 messages
|
||||
refetch = result.get("has_more", False)
|
||||
new_messages = result.get("messages", [])
|
||||
for m in new_messages:
|
||||
new_fetches.append(message_dict_to_model(m))
|
||||
except SlackApiError: # Most likely a rate-limit
|
||||
print("Error while fetching channel messages. (likely rate limit) Retrying in {} seconds...".format(30))
|
||||
time.sleep(30)
|
||||
refetch = True
|
||||
|
||||
|
||||
check_all_past_messages()
|
@ -1,38 +0,0 @@
|
||||
from peewee import SqliteDatabase
|
||||
|
||||
from persistence import article_models, message_models
|
||||
|
||||
# Global logger setup:
|
||||
|
||||
|
||||
# Constant values...
|
||||
DOWNLOADS_DB = "../container_data/downloads.db"
|
||||
MESSAGES_DB = "../container_data/messages.db"
|
||||
|
||||
BOT_ID = "U02MR1R8UJH"
|
||||
ARCHIVE_ID = "C02MM7YG1V4"
|
||||
DEBUG_ID = "C02NM2H9J5Q"
|
||||
|
||||
|
||||
# DB Setup:
|
||||
article_models.set_db(SqliteDatabase(
|
||||
DOWNLOADS_DB,
|
||||
pragmas = {'journal_mode': 'wal'} # mutliple threads can access at once
|
||||
))
|
||||
|
||||
message_models.set_db(SqliteDatabase(MESSAGES_DB))
|
||||
|
||||
|
||||
|
||||
for reaction in message_models.Reaction.select():
|
||||
print(reaction)
|
||||
thread = reaction.message.thread
|
||||
articles = message_models.get_referenced_articles(thread, article_models.ArticleDownload)
|
||||
for a in articles:
|
||||
print(a)
|
||||
reaction = reaction.type
|
||||
status = 1 if reaction == "white_check_mark" else -1
|
||||
print(status)
|
||||
for article in articles:
|
||||
article.verified = status
|
||||
article.save()
|
@ -1,151 +0,0 @@
|
||||
[
|
||||
"https://www.swissinfo.ch/ger/wirtschaft/koennen-ki-und-direkte-demokratie-nebeneinander-bestehen-/47542048",
|
||||
"https://www.zeit.de/2011/33/CH-Oekonophysik",
|
||||
"https://ourworld.unu.edu/en/green-idea-self-organizing-traffic-signals",
|
||||
"https://www.youtube.com/watch?v=-FQD4ie9UYA",
|
||||
"https://www.brandeins.de/corporate-services/mck-wissen/mck-wissen-logistik/schwaermen-fuer-das-optimum",
|
||||
"https://www.youtube.com/watch?v=upQM4Xzh8zM",
|
||||
"https://www.youtube.com/watch?v=gAkoprZmW4k",
|
||||
"https://www.youtube.com/watch?v=VMzfDVAWXHI&t=1s",
|
||||
"https://www.youtube.com/watch?v=1SwTiIlkndE",
|
||||
"https://www.informatik-aktuell.de/management-und-recht/digitalisierung/digitale-revolution-und-oekonomie-40-quo-vadis.html",
|
||||
"https://www.youtube.com/watch?v=cSvvH0SBFOw",
|
||||
"https://www.linkedin.com/posts/margit-osterloh-24198a104_pl%C3%A4doyer-gegen-sprechverbote-ugcPost-6925702100450480129-K7Dl?utm_source=linkedin_share&utm_medium=member_desktop_web",
|
||||
"https://www.nebelspalter.ch/plaedoyer-gegen-sprechverbote",
|
||||
"https://falling-walls.com/people/dirk-helbing/",
|
||||
"https://digitalsensemaker.podigee.io/3-2-mit-dirk-helbing",
|
||||
"https://www.blick.ch/wirtschaft/musk-als-hueter-der-redefreiheit-eth-experte-sagt-musks-vorhaben-hat-potenzial-aber-id17437811.html",
|
||||
"https://www.trend.at/standpunkte/mit-verantwortung-zukunft-10082300",
|
||||
"https://www.pantarhei.ch/podcast/",
|
||||
"https://ethz.ch/en/industry/industry/news/data/2022/04/intelligent-traffic-lights-for-optimal-traffic-flow.html",
|
||||
"https://ethz.ch/de/wirtschaft/industry/news/data/2022/04/optimaler-verkehrsfluss-mit-intelligenten-ampeln.html",
|
||||
"https://www.spektrum.de/news/die-verschlungenen-wege-der-menschen/1181815",
|
||||
"https://www.pcwelt.de/a/diktatur-4-0-schoene-neue-digitalisierte-welt,3447005",
|
||||
"https://www.nzz.ch/english/cancel-culture-at-eth-a-professor-receives-death-threats-over-a-lecture-slide-ld.1675322",
|
||||
"https://www.brandeins.de/corporate-services/mck-wissen/mck-wissen-logistik/schwaermen-fuer-das-optimum",
|
||||
"https://www.achgut.com/artikel/ausgestossene_der_woche_prinz_william_als_immaginierter_rassist",
|
||||
"https://www.pinterpolitik.com/in-depth/klaim-big-data-luhut-perlu-diuji/",
|
||||
"https://www.srf.ch/kultur/gesellschaft-religion/eklat-an-der-eth-wenn-ein-angeblicher-schweinevergleich-zur-staatsaffaere-wird",
|
||||
"https://open.spotify.com/episode/6s1icdoplZeNOINvx6ZHTd?si=610a699eba004da2&nd=1",
|
||||
"https://www.nzz.ch/schweiz/shitstorm-an-der-eth-ein-professor-erhaelt-morddrohungen-ld.1673554",
|
||||
"https://www.nzz.ch/schweiz/shitstorm-an-der-eth-ein-professor-erhaelt-morddrohungen-ld.1673554",
|
||||
"https://djmag.com/features/after-astroworld-what-being-done-stop-crowd-crushes-happening-again",
|
||||
"https://prisma-hsg.ch/articles/meine-daten-deine-daten-unsere-daten/",
|
||||
"https://www.srf.ch/audio/focus/zukunftsforscher-dirk-helbing-die-welt-ist-keine-maschine?id=10756661",
|
||||
"https://www.20min.ch/story/roboter-fuer-hunde-machen-wenig-sinn-647302764916",
|
||||
"https://www.wienerzeitung.at/nachrichten/wissen/mensch/942890-Roboter-als-Praesidentschaftskandidaten.html",
|
||||
"https://disruptors.fm/11-building-a-crystal-ball-of-the-world-unseating-capitalism-and-creating-a-new-world-order-with-prof-dirk-helbing/",
|
||||
"https://www.spreaker.com/user/disruptorsfm/11-building-crystal-ball-of-the-world-un",
|
||||
"https://www.youtube.com/watch?v=fRkCMC3zqSQ",
|
||||
"https://arstechnica.com/science/2021/11/what-the-physics-of-crowds-can-tell-us-about-the-tragic-deaths-at-astroworld/",
|
||||
"https://www.fox23.com/news/trending/astroworld-festival-big-crowds-can-flow-like-liquid-with-terrifying-results/37QH6Q4RGFELHGCZSZTBV46STU/",
|
||||
"https://futurism.com/astroworld-theory-deaths-bodies-fluid",
|
||||
"https://www.businessinsider.com/why-people-died-astroworld-crowd-crush-physics-fluid-dynamics-2021-11",
|
||||
"https://theconversation.com/ten-tips-for-surviving-a-crowd-crush-112169",
|
||||
"https://www.limmattalerzeitung.ch/basel/das-wort-zum-tag-kopie-von-4-januar-hypotenuse-schlaegt-kathete-trivia-trampel-pandemie-ld.2233931",
|
||||
"https://magazine.swissinformatics.org/en/whats-wrong-with-ai/",
|
||||
"https://magazine.swissinformatics.org/en/whats-wrong-with-ai/",
|
||||
"https://www.netkwesties.nl/1541/wrr-ai-wordt-de-verbrandingsmotor-van.htm",
|
||||
"https://youtu.be/ptm9zLG2KaE",
|
||||
"https://www.deutschlandfunkkultur.de/die-zukunft-der-demokratie-mehr-teilhabe-von-unten-wagen.976.de.html?dram:article_id=468341",
|
||||
"https://www.springer.com/gp/book/9783642240034",
|
||||
"https://www.springer.com/de/book/9783319908687",
|
||||
"https://technikjournal.de/2017/08/02/ein-plaedoyer-fuer-die-digitale-demokratie/",
|
||||
"https://technikjournal.de/2017/08/02/ein-plaedoyer-fuer-die-digitale-demokratie/",
|
||||
"https://trafo.hypotheses.org/23989",
|
||||
"https://web.archive.org/web/20200609053329/https://www.wiko-berlin.de/institution/projekte-kooperationen/projekte/working-futures/wiko-briefs-working-futures-in-corona-times/the-corona-crisis-reveals-the-struggle-for-a-sustainable-digital-future/",
|
||||
"https://www.wiko-berlin.de/institution/projekte-kooperationen/projekte/working-futures/wiko-briefs-working-futures-in-corona-times/",
|
||||
"https://www.youtube.com/watch?v=gAkoprZmW4k",
|
||||
"https://www.rhein-zeitung.de/region/aus-den-lokalredaktionen/nahe-zeitung_artikel,-peter-flaschels-lebenswerk-hat-die-sozialgeschichte-beeinflusst-_arid,2322161.html",
|
||||
"https://www.blick.ch/wirtschaft/online-boom-ohne-ende-corona-befeuert-die-tech-revolution-id16359910.html",
|
||||
"https://www.nzz.ch/meinung/china-unterwirft-tech-und-social-media-das-geht-auch-europa-an-ld.1643010",
|
||||
"https://www.say.media/article/la-mort-par-algorithme",
|
||||
"https://www.suedostschweiz.ch/aus-dem-leben/2021-08-14/stau-ist-nicht-gleich-stau",
|
||||
"https://www.swissinfo.ch/eng/directdemocracy/political-perspectives_digital-democracy--too-risky--or-the-chance-of-a-generation-/43836222",
|
||||
"https://kow-berlin.com/exhibitions/illusion-einer-menschenmenge",
|
||||
"https://www.springer.com/gp/book/9783642240034",
|
||||
"https://www.springer.com/de/book/9783319908687",
|
||||
"https://www.politik-kommunikation.de/ressorts/artikel/eine-gefaehrliche-machtasymmetrie-1383558602",
|
||||
"https://www.springer.com/gp/book/9783642240034",
|
||||
"https://www.springer.com/de/book/9783319908687",
|
||||
"https://solutions.hamburg/ethik-und-digitalisierung-nicht-voneinander-getrennt-betrachten/",
|
||||
"https://www.springer.com/gp/book/9783642240034",
|
||||
"https://www.springer.com/de/book/9783319908687",
|
||||
"https://avenue.argusdatainsights.ch/Article/AvenueClip?artikelHash=d14d91ec9a8b4cb0b6bb3012c0cefd8b_27F0B19422F1F03723769C18906AA1EE&artikelDateiId=298862327",
|
||||
"https://www.tagblatt.ch/kultur/grosses-ranking-ihre-stimme-hat-gewicht-das-sind-die-50-profiliertesten-intellektuellen-der-schweiz-ld.2182261",
|
||||
"https://reliefweb.int/report/world/building-multisystemic-understanding-societal-resilience-covid-19-pandemic",
|
||||
"https://reliefweb.int/report/world/building-multisystemic-understanding-societal-resilience-covid-19-pandemic",
|
||||
"https://www.events.at/e/wie-wir-in-zukunft-leben-wollen-die-stadt-als-datenfeld",
|
||||
"https://www.events.at/e/wie-wir-in-zukunft-leben-wollen-die-stadt-als-datenfeld",
|
||||
"https://greennetproject.org/en/2018/11/27/prof-dirk-helbing-es-braucht-vor-allem-tolle-ideen-in-die-sich-die-leute-verlieben/",
|
||||
"https://www.hpcwire.com/2011/05/06/simulating_society_at_the_global_scale/",
|
||||
"https://www.technologyreview.com/2010/04/30/204005/europes-plan-to-simulate-the-entire-planet/",
|
||||
"https://komentare.sme.sk/c/22543617/smrt-podla-algoritmu.html",
|
||||
"https://komentare.sme.sk/c/22543617/smrt-podla-algoritmu.html",
|
||||
"https://www.confidencial.com.ni/opinion/muerte-por-algoritmo/",
|
||||
"https://www.nzz.ch/panorama/wie-kann-eine-massenpanik-verhindert-werden-ld.1614761",
|
||||
"https://www.20min.ch/story/roboter-fuer-hunde-machen-wenig-sinn-647302764916",
|
||||
"https://www.wienerzeitung.at/nachrichten/wissen/mensch/942890-Roboter-als-Praesidentschaftskandidaten.html",
|
||||
"https://www.srf.ch/audio/focus/zukunftsforscher-dirk-helbing-die-welt-ist-keine-maschine?id=10756661",
|
||||
"https://disruptors.fm/11-building-a-crystal-ball-of-the-world-unseating-capitalism-and-creating-a-new-world-order-with-prof-dirk-helbing/",
|
||||
"https://www.spreaker.com/user/disruptorsfm/11-building-crystal-ball-of-the-world-un",
|
||||
"https://www.youtube.com/watch?v=fRkCMC3zqSQ",
|
||||
"https://arstechnica.com/science/2021/11/what-the-physics-of-crowds-can-tell-us-about-the-tragic-deaths-at-astroworld/",
|
||||
"https://www.fox23.com/news/trending/astroworld-festival-big-crowds-can-flow-like-liquid-with-terrifying-results/37QH6Q4RGFELHGCZSZTBV46STU/",
|
||||
"https://futurism.com/astroworld-theory-deaths-bodies-fluid",
|
||||
"https://www.businessinsider.com/why-people-died-astroworld-crowd-crush-physics-fluid-dynamics-2021-11",
|
||||
"https://theconversation.com/ten-tips-for-surviving-a-crowd-crush-112169",
|
||||
"https://www.limmattalerzeitung.ch/basel/das-wort-zum-tag-kopie-von-4-januar-hypotenuse-schlaegt-kathete-trivia-trampel-pandemie-ld.2233931",
|
||||
"https://www.pantarhei.ch/podcast/",
|
||||
"https://www.focus.it/scienza/scienze/folla-fisica-modelli-simulazioni",
|
||||
"https://www.focus.it/scienza/scienze/folla-fisica-modelli-simulazioni",
|
||||
"https://www.netkwesties.nl/1541/wrr-ai-wordt-de-verbrandingsmotor-van.htm",
|
||||
"https://www.transformationbeats.com/de/transformation/digitale-gesellschaft/",
|
||||
"https://www.transformationbeats.com/de/transformation/digitale-gesellschaft/",
|
||||
"https://www.suedkurier.de/ueberregional/wirtschaft/Wie-uns-der-Staat-heimlich-erzieht-sogar-auf-dem-Klo;art416,8763904",
|
||||
"https://www.suedkurier.de/ueberregional/wirtschaft/Wie-uns-der-Staat-heimlich-erzieht-sogar-auf-dem-Klo;art416,8763904",
|
||||
"https://www.deutschlandfunkkultur.de/die-zukunft-der-demokratie-mehr-teilhabe-von-unten-wagen.976.de.html?dram:article_id=468341",
|
||||
"https://www.springer.com/gp/book/9783642240034",
|
||||
"https://www.springer.com/de/book/9783319908687",
|
||||
"https://trafo.hypotheses.org/23989",
|
||||
"https://web.archive.org/web/20200609053329/https://www.wiko-berlin.de/institution/projekte-kooperationen/projekte/working-futures/wiko-briefs-working-futures-in-corona-times/the-corona-crisis-reveals-the-struggle-for-a-sustainable-digital-future/",
|
||||
"https://www.wiko-berlin.de/institution/projekte-kooperationen/projekte/working-futures/wiko-briefs-working-futures-in-corona-times/",
|
||||
"https://www.youtube.com/watch?v=gAkoprZmW4k",
|
||||
"https://futurium.de/de/gespraech/ranga-yogeshwar-1/ranga-yogeshwar-dirk-helbing-mit-musik-von-till-broenner",
|
||||
"https://www.springer.com/gp/book/9783642240034",
|
||||
"https://www.springer.com/de/book/9783319908687",
|
||||
"https://idw-online.de/en/news113518",
|
||||
"https://blmplus.de/die-digitalcharta-ist-erst-der-anfang-ein-szenario-von-dirk-helbing/",
|
||||
"https://www.risiko-dialog.ch/big-nudging-vom-computer-gelenkt-aber-wohin/",
|
||||
"https://idw-online.de/de/news13986",
|
||||
"https://www.uni-stuttgart.de/presse/archiv/uni-kurier/uk84_85/forschung/fw66.html",
|
||||
"https://www.infosperber.ch/medien/trends/rankings-oft-unbrauchbar-so-oder-so-aber-immer-schadlich/",
|
||||
"https://www.infosperber.ch/medien/trends/rankings-oft-unbrauchbar-so-oder-so-aber-immer-schadlich/",
|
||||
"https://www.nzz.ch/meinung/china-unterwirft-tech-und-social-media-das-geht-auch-europa-an-ld.1643010",
|
||||
"https://www.suedostschweiz.ch/aus-dem-leben/2021-08-14/stau-ist-nicht-gleich-stau",
|
||||
"https://www.swissinfo.ch/eng/directdemocracy/political-perspectives_digital-democracy--too-risky--or-the-chance-of-a-generation-/43836222",
|
||||
"https://werteundwandel.de/inhalte/d2030-in-aufbruchstimmung-fuer-eine-lebenswerte-zukunft/",
|
||||
"https://www.springer.com/gp/book/9783642240034",
|
||||
"https://www.springer.com/de/book/9783319908687",
|
||||
"https://www.youtube.com/watch?v=n9e77iYZPEY",
|
||||
"https://greennetproject.org/en/2018/11/27/prof-dirk-helbing-es-braucht-vor-allem-tolle-ideen-in-die-sich-die-leute-verlieben/",
|
||||
"https://www.hpcwire.com/2011/05/06/simulating_society_at_the_global_scale/",
|
||||
"https://www.say.media/article/la-mort-par-algorithme",
|
||||
"https://www.confidencial.com.ni/opinion/muerte-por-algoritmo/",
|
||||
"https://www.nzz.ch/panorama/wie-kann-eine-massenpanik-verhindert-werden-ld.1614761",
|
||||
"https://www.nesta.org.uk/report/digital-democracy-the-tools-transforming-political-engagement/",
|
||||
"https://www.nature.com/articles/news.2010.351",
|
||||
"https://www.focus.de/panorama/welt/tid-19265/gastkommentar-nutzt-die-moeglichkeiten-des-computers_aid_534372.html",
|
||||
"https://www.theglobalist.com/democracy-technology-innovation-society-internet/",
|
||||
"https://www.theglobalist.com/capitalism-democracy-technology-surveillance-privacy/",
|
||||
"https://www.theglobalist.com/google-artificial-intelligence-big-data-technology-future/",
|
||||
"https://www.theglobalist.com/fascism-big-data-artificial-intelligence-surveillance-democracy/",
|
||||
"https://www.theglobalist.com/technology-big-data-artificial-intelligence-future-peace-rooms/",
|
||||
"https://www.theglobalist.com/technology-society-sustainability-future-humanity/",
|
||||
"https://www.theglobalist.com/society-technology-peace-sustainability/",
|
||||
"https://www.theglobalist.com/democracy-technology-social-media-artificial-intelligence/",
|
||||
"https://www.theglobalist.com/financial-system-reform-economy-internet-of-things-capitalism/",
|
||||
"https://www.theglobalist.com/capitalism-society-equality-sustainability-crowd-funding/",
|
||||
"https://www.theglobalist.com/united-nations-world-government-peace-sustainability-society/",
|
||||
"https://www.theglobalist.com/world-economy-sustainability-environment-society/"
|
||||
]
|
@ -1,61 +0,0 @@
|
||||
import youtube_dl
|
||||
from waybackpy import WaybackMachineSaveAPI # upload to archive.org
|
||||
import time
|
||||
|
||||
|
||||
urls = [
|
||||
"https://id2020.org",
|
||||
"https://www.weforum.org/platforms/the-centre-for-cybersecurity",
|
||||
"https://www.unhcr.org/blogs/wp-content/uploads/sites/48/2018/04/fs.pdf",
|
||||
"https://en.wikipedia.org/wiki/Social_Credit_System",
|
||||
"https://en.wikipedia.org/wiki/Customer_lifetime_value",
|
||||
"https://www.weforum.org/reports/the-internet-of-bodies-is-here-tackling-new-challenges-of-technology-governance",
|
||||
"https://www.un.org/en/about-us/universal-declaration-of-human-rights",
|
||||
"https://www.biometricupdate.com/201909/id2020-and-partners-launch-program-to-provide-digital-id-with-vaccines",
|
||||
"https://www.wired.com/2008/06/pb-theory/",
|
||||
"https://www.medtechdive.com/news/fda-warns-of-false-positives-with-bd-coronavirus-diagnostic/581115/",
|
||||
"https://www.bbc.com/news/world-middle-east-52579475",
|
||||
"https://www.timesofisrael.com/over-12000-mistakenly-quarantined-by-phone-tracking-health-ministry-admits/",
|
||||
"https://www.delftdesignforvalues.nl",
|
||||
"https://www.theglobalist.com/technology-big-data-artificial-intelligence-future-peace-rooms/",
|
||||
"https://link.springer.com/chapter/10.1007/978-3-319-90869-4_17",
|
||||
"https://www.youtube.com/watch?v=_KhAsJRk2lo",
|
||||
"https://www.bloomberg.org/environment/supporting-sustainable-cities/american-cities-climate-challenge/",
|
||||
"https://climatecitycup.org",
|
||||
|
||||
]
|
||||
|
||||
def post_download_hook(ret_code):
|
||||
# print(ret_code)
|
||||
if ret_code['status'] == 'finished':
|
||||
file_loc = ret_code["filename"]
|
||||
print(file_loc)
|
||||
|
||||
|
||||
def save_video(url):
|
||||
"""Saves video accoring to url and save path"""
|
||||
ydl_opts = {
|
||||
'format': 'best[height<=720]',
|
||||
# 'outtmpl': f"{file_path}.%(ext)s", # basically the filename from the object, but with a custom extension depending on the download
|
||||
'progress_hooks': [post_download_hook],
|
||||
'updatetime': False
|
||||
}
|
||||
try:
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download([url])
|
||||
# article file name is updated in self.post_download_hook
|
||||
except Exception as e:
|
||||
print(f"Youtube download crashed: {e}")
|
||||
|
||||
|
||||
# for i, url in enumerate(urls):
|
||||
# print(f"Downloading video {i+1} / {len(urls)}")
|
||||
# save_video(url)
|
||||
|
||||
for i, url in enumerate(urls):
|
||||
print(f"Saving url {i+1} / {len(urls)}")
|
||||
user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" # needed?
|
||||
wayback = WaybackMachineSaveAPI(url, user_agent)
|
||||
archive_url = wayback.save()
|
||||
print(archive_url)
|
||||
time.sleep(20)
|
@ -2,10 +2,10 @@ FROM python:latest
|
||||
|
||||
ENV TZ Europe/Zurich
|
||||
|
||||
RUN mkdir -p /app/auto_news
|
||||
RUN mkdir -p /app/news_fetch
|
||||
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
RUN python3 -m pip install -r /app/requirements.txt
|
||||
|
||||
COPY . /app/auto_news
|
||||
WORKDIR /app/auto_news
|
||||
COPY . /app/news_fetch
|
||||
WORKDIR /app/news_fetch
|
||||
|
@ -126,13 +126,12 @@ class Dispatcher(Thread):
|
||||
|
||||
|
||||
|
||||
# def manual_processing(self, articles, workers):
|
||||
# for w in workers:
|
||||
# w.start()
|
||||
|
||||
# for article in articles:
|
||||
# notifier = lambda article: logger.info(f"Completed manual actions for {article}")
|
||||
# ArticleWatcher(article, workers_manual = workers, notifier = notifier) # Article watcher wants a thread to link article to TODO: handle threads as a kwarg
|
||||
class PrintWorker:
|
||||
def send(self, article):
|
||||
print(f"Uploaded article {article}")
|
||||
def keep_alive(self): # keeps script running, because there is nothing else in the main thread
|
||||
while True: sleep(1)
|
||||
|
||||
|
||||
|
||||
@ -140,11 +139,6 @@ if __name__ == "__main__":
|
||||
dispatcher = Dispatcher()
|
||||
|
||||
if "upload" in sys.argv:
|
||||
class PrintWorker:
|
||||
def send(self, article):
|
||||
print(f"Uploaded article {article}")
|
||||
def keep_alive(self): # keeps script running, because there is nothing else in the main thread
|
||||
while True: sleep(1)
|
||||
|
||||
articles = models.ArticleDownload.select().where(models.ArticleDownload.archive_url == "" or models.ArticleDownload.archive_url == "TODO:UPLOAD").execute()
|
||||
logger.info(f"Launching upload to archive for {len(articles)} articles.")
|
||||
|
Loading…
x
Reference in New Issue
Block a user