new component - upload to NAS
This commit is contained in:
		
							
								
								
									
										62
									
								
								news_fetch/app/utils_worker/fetch/runner.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								news_fetch/app/utils_worker/fetch/runner.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,62 @@ | ||||
| from newspaper import Article | ||||
| from urllib.parse import urlparse | ||||
| from htmldate import find_date | ||||
| import datetime | ||||
| import logging | ||||
| logging.getLogger('newspaper').setLevel(logging.ERROR) # quieter logs | ||||
| logging.getLogger('urllib').setLevel(logging.ERROR) # quieter logs | ||||
| logging.getLogger('urllib3.poolmanager').setLevel(logging.ERROR) # quieter logs | ||||
| logging.getLogger('htmldate').setLevel(logging.ERROR) #quieter logs | ||||
| logging.getLogger('charset_normalizer').setLevel(logging.ERROR) #quieter logs | ||||
| logger = logging.getLogger("fetch") | ||||
|  | ||||
|  | ||||
| def get_description(article_object): | ||||
|     url = article_object.article_url | ||||
|     website = urlparse(url).netloc | ||||
|     article_object.source_name = website | ||||
|  | ||||
|     try: | ||||
|         article_object.pub_date = datetime.datetime.strptime(find_date(url), '%Y-%d-%M') | ||||
|     except: # other file types | ||||
|         article_object.pub_date = datetime.datetime(year=1900, month=1, day=1) | ||||
|  | ||||
|     try: | ||||
|         news_article = Article(url) | ||||
|         news_article.download() | ||||
|         news_article.parse() | ||||
|     except: | ||||
|         news_article = object() # fallback value | ||||
|  | ||||
|     try: | ||||
|         article_object.title = news_article.title | ||||
|     except AttributeError: | ||||
|         article_object.title = "Error while running fetch" | ||||
|  | ||||
|     try: | ||||
|         if article_object.summary: | ||||
|             article_object.summary = news_article.summary | ||||
|         elif news_article.text: | ||||
|             ind = min(500, len(news_article.text)) | ||||
|             article_object.summary = news_article.text[:ind] + "..." | ||||
|         else: | ||||
|             article_object.summary = "" | ||||
|     except AttributeError: | ||||
|         article_object.summary = "" | ||||
|  | ||||
|     try: | ||||
|         article_object.language = news_article.meta_lang | ||||
|     except AttributeError: | ||||
|         article_object.language = "" | ||||
|  | ||||
|     try: | ||||
|         article_object.set_authors(news_article.authors) | ||||
|     except AttributeError: | ||||
|         pass # list would have been empty anyway | ||||
|      | ||||
|     try: | ||||
|         article_object.set_keywords(news_article.keywords) | ||||
|     except AttributeError: | ||||
|         pass  # list would have been empty anyway | ||||
|      | ||||
|     return article_object | ||||
		Reference in New Issue
	
	Block a user
	 Remy Moll
					Remy Moll