Working and up to date. WIP misc manual actions
This commit is contained in:
		@@ -11,61 +11,52 @@ logging.getLogger('charset_normalizer').setLevel(logging.ERROR) #quieter logs
 | 
			
		||||
logger = logging.getLogger("fetch")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NewspaperDummy():
 | 
			
		||||
    title = "Error while running fetch"
 | 
			
		||||
    summary = "Error while running fetch"
 | 
			
		||||
    text = "Error while running fetch"
 | 
			
		||||
    meta_lang = ""
 | 
			
		||||
    authors = []
 | 
			
		||||
    keywords = []
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_description(article_object):
 | 
			
		||||
    url = article_object.article_url
 | 
			
		||||
    website = urlparse(url).netloc
 | 
			
		||||
    article_object.source_name = website
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        pub_date = datetime.datetime.strptime(find_date(url), '%Y-%d-%M')
 | 
			
		||||
        article_object.pub_date = datetime.datetime.strptime(find_date(url), '%Y-%d-%M')
 | 
			
		||||
    except: # other file types
 | 
			
		||||
        pub_date = datetime.datetime(year=1900, month=1, day=1)
 | 
			
		||||
    article_object.pub_date = pub_date
 | 
			
		||||
        article_object.pub_date = datetime.datetime(year=1900, month=1, day=1)
 | 
			
		||||
 | 
			
		||||
    fallback = NewspaperDummy()
 | 
			
		||||
    try:
 | 
			
		||||
        news_article = Article(url)
 | 
			
		||||
        news_article.download()
 | 
			
		||||
        news_article.parse()
 | 
			
		||||
    except:
 | 
			
		||||
        news_article = fallback
 | 
			
		||||
 | 
			
		||||
    if news_article.title:
 | 
			
		||||
        title = news_article.title
 | 
			
		||||
    else:
 | 
			
		||||
        title = fallback.title
 | 
			
		||||
 | 
			
		||||
    if news_article.summary:
 | 
			
		||||
        summary = news_article.summary
 | 
			
		||||
    elif news_article.text:
 | 
			
		||||
        ind = min(500, len(news_article.text))
 | 
			
		||||
        summary = news_article.text[:ind] + "..."
 | 
			
		||||
    else:
 | 
			
		||||
        summary = fallback.summary
 | 
			
		||||
        news_article = object() # fallback value
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        print(f"lang: {news_article.meta_lang}")
 | 
			
		||||
    except:
 | 
			
		||||
        print("could not access meta_lang")
 | 
			
		||||
        
 | 
			
		||||
    if news_article.meta_lang:
 | 
			
		||||
        lang = news_article.meta_lang
 | 
			
		||||
    else:
 | 
			
		||||
        lang = ""
 | 
			
		||||
        article_object.title = news_article.title
 | 
			
		||||
    except AttributeError:
 | 
			
		||||
        article_object.title = "Error while running fetch"
 | 
			
		||||
 | 
			
		||||
    article_object.title = title
 | 
			
		||||
    article_object.summary = summary
 | 
			
		||||
    article_object.language = lang
 | 
			
		||||
    article_object.set_authors(news_article.authors)
 | 
			
		||||
    article_object.set_keywords(news_article.keywords)
 | 
			
		||||
    try:
 | 
			
		||||
        if article_object.summary:
 | 
			
		||||
            article_object.summary = news_article.summary
 | 
			
		||||
        elif news_article.text:
 | 
			
		||||
            ind = min(500, len(news_article.text))
 | 
			
		||||
            article_object.summary = news_article.text[:ind] + "..."
 | 
			
		||||
        else:
 | 
			
		||||
            article_object.summary = ""
 | 
			
		||||
    except AttributeError:
 | 
			
		||||
        article_object.summary = ""
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        article_object.language = news_article.meta_lang
 | 
			
		||||
    except AttributeError:
 | 
			
		||||
        article_object.language = ""
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        article_object.set_authors(news_article.authors)
 | 
			
		||||
    except AttributeError:
 | 
			
		||||
        pass # list would have been empty anyway
 | 
			
		||||
    
 | 
			
		||||
    try:
 | 
			
		||||
        article_object.set_keywords(news_article.keywords)
 | 
			
		||||
    except AttributeError:
 | 
			
		||||
        pass  # list would have been empty anyway
 | 
			
		||||
    
 | 
			
		||||
    return article_object
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user