58 lines
1.8 KiB
Python

from newspaper import Article
from urllib.parse import urlparse
from htmldate import find_date
import datetime
import logging
logging.getLogger('newspaper').setLevel(logging.ERROR) # quieter logs
logging.getLogger('urllib').setLevel(logging.ERROR) # quieter logs
logging.getLogger('urllib3.poolmanager').setLevel(logging.ERROR) # quieter logs
logging.getLogger('htmldate').setLevel(logging.ERROR) #quieter logs
logging.getLogger('charset_normalizer').setLevel(logging.ERROR) #quieter logs
logger = logging.getLogger("fetch")
def get_description(article_object):
url = article_object.article_url
website = urlparse(url).netloc
article_object.source_name = website
try:
article_object.pub_date = datetime.datetime.strptime(find_date(url), '%Y-%d-%M')
except: # other file types
article_object.pub_date = datetime.datetime(year=1900, month=1, day=1)
try:
news_article = Article(url)
news_article.download()
news_article.parse()
except:
news_article = object() # fallback value
try:
article_object.title = news_article.title
except AttributeError:
article_object.title = "Error while running fetch"
try:
if article_object.summary:
article_object.summary = news_article.summary
elif news_article.text:
ind = min(500, len(news_article.text))
article_object.summary = news_article.text[:ind] + "..."
else:
article_object.summary = ""
except AttributeError:
article_object.summary = ""
try:
article_object.language = news_article.meta_lang
except AttributeError:
article_object.language = ""
try:
article_object.set_authors(news_article.authors)
except AttributeError:
pass # list would have been empty anyway
return article_object