23 lines
489 B
Python
23 lines
489 B
Python
import os
|
|
import re
|
|
import json
|
|
|
|
os.chdir("/home/remy/Documents/mails2/")
|
|
|
|
regex = "(?P<url>https?://[^\s]+)"
|
|
|
|
all_files = os.listdir(".")
|
|
all_urls = []
|
|
|
|
for f in all_files:
|
|
with open(f, "r", encoding="utf8") as mail:
|
|
content = mail.readlines()
|
|
|
|
search = "".join(content)
|
|
urls = re.findall(regex, search)
|
|
all_urls += urls
|
|
|
|
print("Saved {} urls".format(len(all_urls)))
|
|
|
|
with open("mails_export.json", "w") as f:
|
|
json.dump(all_urls, f) |