import youtube_dl from waybackpy import WaybackMachineSaveAPI # upload to archive.org import time urls = [ "https://id2020.org", "https://www.weforum.org/platforms/the-centre-for-cybersecurity", "https://www.unhcr.org/blogs/wp-content/uploads/sites/48/2018/04/fs.pdf", "https://en.wikipedia.org/wiki/Social_Credit_System", "https://en.wikipedia.org/wiki/Customer_lifetime_value", "https://www.weforum.org/reports/the-internet-of-bodies-is-here-tackling-new-challenges-of-technology-governance", "https://www.un.org/en/about-us/universal-declaration-of-human-rights", "https://www.biometricupdate.com/201909/id2020-and-partners-launch-program-to-provide-digital-id-with-vaccines", "https://www.wired.com/2008/06/pb-theory/", "https://www.medtechdive.com/news/fda-warns-of-false-positives-with-bd-coronavirus-diagnostic/581115/", "https://www.bbc.com/news/world-middle-east-52579475", "https://www.timesofisrael.com/over-12000-mistakenly-quarantined-by-phone-tracking-health-ministry-admits/", "https://www.delftdesignforvalues.nl", "https://www.theglobalist.com/technology-big-data-artificial-intelligence-future-peace-rooms/", "https://link.springer.com/chapter/10.1007/978-3-319-90869-4_17", "https://www.youtube.com/watch?v=_KhAsJRk2lo", "https://www.bloomberg.org/environment/supporting-sustainable-cities/american-cities-climate-challenge/", "https://climatecitycup.org", ] def post_download_hook(ret_code): # print(ret_code) if ret_code['status'] == 'finished': file_loc = ret_code["filename"] print(file_loc) def save_video(url): """Saves video accoring to url and save path""" ydl_opts = { 'format': 'best[height<=720]', # 'outtmpl': f"{file_path}.%(ext)s", # basically the filename from the object, but with a custom extension depending on the download 'progress_hooks': [post_download_hook], 'updatetime': False } try: with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) # article file name is updated in self.post_download_hook except Exception as e: print(f"Youtube download crashed: {e}") # for i, url in enumerate(urls): # print(f"Downloading video {i+1} / {len(urls)}") # save_video(url) for i, url in enumerate(urls): print(f"Saving url {i+1} / {len(urls)}") user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" # needed? wayback = WaybackMachineSaveAPI(url, user_agent) archive_url = wayback.save() print(archive_url) time.sleep(20)