coss_archiving/misc/exctract_from_mail_backup.py
2022-06-23 15:05:59 +02:00

23 lines
494 B
Python

import os
import re
import json
os.chdir("/home/remy/Downloads/mails/")
regex = "(?P<url>https?://[^\s]+)"
all_files = os.listdir(".")
all_urls = []
for f in all_files:
with open(f, "r", encoding="utf8") as mail:
content = mail.readlines()
search = "".join(content)
urls = re.findall(regex, search)
all_urls += urls
print("Saved {} urls".format(len(all_urls)))
with open("media_mails_export.json", "w") as f:
json.dump(all_urls, f)