22 lines
699 B
Python
22 lines
699 B
Python
"""
|
|
Saves websites specified in 'batch_urls.txt' to the wayback machine. Outputs archive urls to terminal
|
|
Hint: use 'python batch_archive.py > batch_archive.txt' to save the output to a file
|
|
"""
|
|
from waybackpy import WaybackMachineSaveAPI # upload to archive.org
|
|
import time
|
|
|
|
urls = []
|
|
with open ("batch_urls.txt", "r") as f:
|
|
urls = f.readlines()
|
|
|
|
|
|
|
|
for i, url in enumerate(urls):
|
|
print(f"Saving url {i+1} / {len(urls)}")
|
|
user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" # needed?
|
|
wayback = WaybackMachineSaveAPI(url, user_agent)
|
|
archive_url = wayback.save()
|
|
print(archive_url)
|
|
time.sleep(20)
|
|
# Uploads to archive.org are rate limited
|