Fixed bug where duplicate threads would not get responded

This commit is contained in:
Remy Moll
2022-04-26 10:32:41 +02:00
parent 024da446e7
commit 246729d376
6 changed files with 56 additions and 41 deletions

View File

@@ -136,12 +136,11 @@ class PDFDownloader:
hrefs = [e.get_attribute("href") for e in self.driver.find_elements_by_xpath("//a[@href]")]
except:
hrefs = []
old = hrefs
len_old = len(hrefs)
hrefs = [h for h in hrefs \
if not sum([(domain in h) for domain in blacklisted]) # sum([True, False, False, False]) == 1 (esp. not 0)
] # filter a tiny bit at least
diff = set(old) ^ set(hrefs)
self.logger.info(f"Removed {len(diff)} hrefs: {diff} (before:{len(old)}, after: {len(hrefs)})")
self.logger.info(f"Hrefs result (before:{len_old}, after: {len(hrefs)})")
return hrefs