from newspaper import Article from urllib.parse import urlparse from htmldate import find_date import datetime import logging logging.getLogger('newspaper').setLevel(logging.ERROR) # quieter logs logging.getLogger('urllib').setLevel(logging.ERROR) # quieter logs logging.getLogger('urllib3.poolmanager').setLevel(logging.ERROR) # quieter logs logging.getLogger('htmldate').setLevel(logging.ERROR) #quieter logs logging.getLogger('charset_normalizer').setLevel(logging.ERROR) #quieter logs logger = logging.getLogger("fetch") def get_description(article_object): url = article_object.article_url website = urlparse(url).netloc article_object.source_name = website try: article_object.pub_date = datetime.datetime.strptime(find_date(url), '%Y-%d-%M') except: # other file types article_object.pub_date = datetime.datetime(year=1900, month=1, day=1) try: news_article = Article(url) news_article.download() news_article.parse() except: news_article = object() # fallback value try: article_object.title = news_article.title except AttributeError: article_object.title = "Error while running fetch" try: if article_object.summary: article_object.summary = news_article.summary elif news_article.text: ind = min(500, len(news_article.text)) article_object.summary = news_article.text[:ind] + "..." else: article_object.summary = "" except AttributeError: article_object.summary = "" try: article_object.language = news_article.meta_lang except AttributeError: article_object.language = "" try: article_object.set_authors(news_article.authors) except AttributeError: pass # list would have been empty anyway return article_object