reduced slack functionality, higher ease of use. Database migration wip
This commit is contained in:
		
							
								
								
									
										57
									
								
								news_fetch/utils_worker/fetch/runner.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								news_fetch/utils_worker/fetch/runner.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,57 @@
 | 
			
		||||
from newspaper import Article
 | 
			
		||||
from urllib.parse import urlparse
 | 
			
		||||
from htmldate import find_date
 | 
			
		||||
import datetime
 | 
			
		||||
import logging
 | 
			
		||||
logging.getLogger('newspaper').setLevel(logging.ERROR) # quieter logs
 | 
			
		||||
logging.getLogger('urllib').setLevel(logging.ERROR) # quieter logs
 | 
			
		||||
logging.getLogger('urllib3.poolmanager').setLevel(logging.ERROR) # quieter logs
 | 
			
		||||
logging.getLogger('htmldate').setLevel(logging.ERROR) #quieter logs
 | 
			
		||||
logging.getLogger('charset_normalizer').setLevel(logging.ERROR) #quieter logs
 | 
			
		||||
logger = logging.getLogger("fetch")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_description(article_object):
 | 
			
		||||
    url = article_object.article_url
 | 
			
		||||
    website = urlparse(url).netloc
 | 
			
		||||
    article_object.source_name = website
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        article_object.pub_date = datetime.datetime.strptime(find_date(url), '%Y-%d-%M')
 | 
			
		||||
    except: # other file types
 | 
			
		||||
        article_object.pub_date = datetime.datetime(year=1900, month=1, day=1)
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        news_article = Article(url)
 | 
			
		||||
        news_article.download()
 | 
			
		||||
        news_article.parse()
 | 
			
		||||
    except:
 | 
			
		||||
        news_article = object() # fallback value
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        article_object.title = news_article.title
 | 
			
		||||
    except AttributeError:
 | 
			
		||||
        article_object.title = "Error while running fetch"
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        if article_object.summary:
 | 
			
		||||
            article_object.summary = news_article.summary
 | 
			
		||||
        elif news_article.text:
 | 
			
		||||
            ind = min(500, len(news_article.text))
 | 
			
		||||
            article_object.summary = news_article.text[:ind] + "..."
 | 
			
		||||
        else:
 | 
			
		||||
            article_object.summary = ""
 | 
			
		||||
    except AttributeError:
 | 
			
		||||
        article_object.summary = ""
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        article_object.language = news_article.meta_lang
 | 
			
		||||
    except AttributeError:
 | 
			
		||||
        article_object.language = ""
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        article_object.set_authors(news_article.authors)
 | 
			
		||||
    except AttributeError:
 | 
			
		||||
        pass # list would have been empty anyway
 | 
			
		||||
        
 | 
			
		||||
    return article_object
 | 
			
		||||
		Reference in New Issue
	
	Block a user