Fixed bug where duplicate threads would not get responded
This commit is contained in:
@@ -136,12 +136,11 @@ class PDFDownloader:
|
||||
hrefs = [e.get_attribute("href") for e in self.driver.find_elements_by_xpath("//a[@href]")]
|
||||
except:
|
||||
hrefs = []
|
||||
old = hrefs
|
||||
len_old = len(hrefs)
|
||||
hrefs = [h for h in hrefs \
|
||||
if not sum([(domain in h) for domain in blacklisted]) # sum([True, False, False, False]) == 1 (esp. not 0)
|
||||
] # filter a tiny bit at least
|
||||
diff = set(old) ^ set(hrefs)
|
||||
self.logger.info(f"Removed {len(diff)} hrefs: {diff} (before:{len(old)}, after: {len(hrefs)})")
|
||||
self.logger.info(f"Hrefs result (before:{len_old}, after: {len(hrefs)})")
|
||||
return hrefs
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user