Fixed bug where duplicate threads would not get responded
This commit is contained in:
		| @@ -136,12 +136,11 @@ class PDFDownloader: | ||||
|             hrefs = [e.get_attribute("href") for e in self.driver.find_elements_by_xpath("//a[@href]")] | ||||
|         except: | ||||
|             hrefs = [] | ||||
|         old = hrefs | ||||
|         len_old = len(hrefs) | ||||
|         hrefs = [h for h in hrefs \ | ||||
|             if not sum([(domain in h) for domain in blacklisted]) # sum([True, False, False, False]) == 1 (esp. not 0) | ||||
|             ] # filter a tiny bit at least | ||||
|         diff = set(old) ^ set(hrefs) | ||||
|         self.logger.info(f"Removed {len(diff)} hrefs: {diff} (before:{len(old)}, after: {len(hrefs)})") | ||||
|         self.logger.info(f"Hrefs result (before:{len_old}, after: {len(hrefs)})") | ||||
|         return hrefs | ||||
|  | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Remy Moll
					Remy Moll