""" Extracts all urls from a list of mails exported from thunderbird. Writes to 'mails_url_export.json' """ import os import re import json os.chdir("/home/remy/Downloads/mails/") regex = "(?Phttps?://[^\s]+)" all_files = os.listdir(".") all_urls = [] for f in all_files: with open(f, "r", encoding="utf8") as mail: content = mail.readlines() search = "".join(content) urls = re.findall(regex, search) all_urls += urls print("Saved {} urls".format(len(all_urls))) with open("mails_url_export.json", "w") as f: json.dump(all_urls, f)