coss_archiving/manual/batch_archive.py

22 lines
699 B
Python

"""
Saves websites specified in 'batch_urls.txt' to the wayback machine. Outputs archive urls to terminal
Hint: use 'python batch_archive.py > batch_archive.txt' to save the output to a file
"""
from waybackpy import WaybackMachineSaveAPI # upload to archive.org
import time
urls = []
with open ("batch_urls.txt", "r") as f:
urls = f.readlines()
for i, url in enumerate(urls):
print(f"Saving url {i+1} / {len(urls)}")
user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" # needed?
wayback = WaybackMachineSaveAPI(url, user_agent)
archive_url = wayback.save()
print(archive_url)
time.sleep(20)
# Uploads to archive.org are rate limited