2022-03-15 08:05:28 +00:00
"""
script for news fetching ( by keywords )
"""
__author__ = " Florian Kellermann, Linus Eickhoff "
2022-04-26 10:46:20 +00:00
__date__ = " 26.04.2022 "
2022-05-11 21:33:48 +00:00
__version__ = " 1.0.0 "
2022-03-15 09:39:29 +00:00
__license__ = " None "
2022-03-15 13:01:33 +00:00
import os
2022-05-11 21:33:48 +00:00
import sys
2022-03-15 13:01:33 +00:00
2022-05-10 16:43:41 +00:00
import helper_functions as hf
2022-05-11 21:33:48 +00:00
import requests
2022-03-15 13:01:33 +00:00
from dotenv import load_dotenv
2022-05-11 21:33:48 +00:00
from newsapi import NewsApiClient
2022-03-15 13:01:33 +00:00
2022-05-11 21:33:48 +00:00
load_dotenv ( ) # loads environment vars
2022-03-15 13:01:33 +00:00
2022-03-15 10:27:54 +00:00
# Init
2022-05-11 21:33:48 +00:00
api_key = os . getenv ( ' NEWS_API_KEY ' ) # get API Key from .env file
newsapi = NewsApiClient ( api_key = api_key ) # news api from https://newsapi.org/
2022-05-10 17:16:44 +00:00
2022-04-17 08:49:42 +00:00
try :
2022-04-25 15:45:47 +00:00
# get all available news sources (e.g BBC, New York Times, etc.)
2022-04-17 08:49:42 +00:00
source_json = requests . get ( f " https://newsapi.org/v2/top-headlines/sources?apiKey= { api_key } &language=en " ) . json ( )
sources = source_json [ " sources " ]
str_sources = " , " . join ( [ source [ " id " ] for source in sources ] )
2022-05-10 17:16:44 +00:00
2022-04-17 08:49:42 +00:00
except KeyError :
2022-05-10 17:16:44 +00:00
print ( " Error: Could not get sources, may be blocked because of too many requests (free newsapi is limited to 100 reqs per day) " )
2022-05-11 21:33:48 +00:00
str_sources = str (
" Reuters, bbc, cnn, fox-news, google-news, hacker-news, nytimes, the-huffington-post, the-new-york-times, business-insider, bbc-news, cbc-news, ESPN, fox-sports, google-news-uk, independent, the-wall-street-journal, the-washington-times, time, usa-today " )
2022-03-15 10:27:54 +00:00
2022-04-12 08:12:35 +00:00
def get_all_news_by_keyword ( keyword , from_date = " 2000-01-01 " ) :
2022-03-29 10:28:28 +00:00
""" get all news to keyword
2022-03-15 13:43:05 +00:00
Args :
keyword ( String ) : keyword for search
2022-03-29 11:37:44 +00:00
from_date ( String ) : min date for search
2022-03-15 13:43:05 +00:00
Returns :
JSON / dict : dict containing articles
2022-03-29 10:04:49 +00:00
"""
2022-05-11 21:33:48 +00:00
top_headlines = newsapi . get_everything ( q = keyword , sources = str_sources , language = ' en ' , from_param = from_date ) # keywords can be combined with OR (e.g. keyword = "bitcoin OR ethereum")
if ( top_headlines [ " status " ] == " ok " ) :
2022-04-04 16:57:59 +00:00
return top_headlines
else :
return None
def get_top_news_by_keyword ( keyword ) :
""" get top news to keyword
Args :
keyword ( String ) : keyword for search
Returns :
JSON / dict : dict containing articles
"""
2022-05-11 21:33:48 +00:00
top_headlines = newsapi . get_top_headlines ( q = keyword , sources = str_sources , language = ' en ' ) # get top headlines, measured by popularity from NewsApi
if ( top_headlines [ " status " ] == " ok " ) :
2022-04-04 16:57:59 +00:00
return top_headlines
else :
return None
2022-03-15 12:54:08 +00:00
def format_article ( article ) :
2022-03-15 13:43:05 +00:00
""" format article for messaging (using markdown syntax)
Args :
article ( dict ) : article to format for messaging
Returns :
String : formatted article
2022-05-11 21:33:48 +00:00
"""
sourcename = hf . make_markdown_proof ( article [ " source " ] [ " name " ] ) # make attributes markdownv2 proof
2022-05-10 16:43:41 +00:00
headline = hf . make_markdown_proof ( article [ " title " ] )
url = hf . make_markdown_proof ( article [ " url " ] )
2022-05-11 21:33:48 +00:00
formatted_article = f " _ { sourcename } _ \n * { headline } * \n \n { url } " # formatting in Markdown syntax
2022-03-15 12:54:08 +00:00
return formatted_article
2022-05-10 16:43:41 +00:00
2022-05-11 21:33:48 +00:00
if __name__ == ' __main__ ' : # only execute if script is called directly -> for simple testing
2022-03-15 13:24:11 +00:00
2022-03-29 11:37:44 +00:00
print ( " this is a module and should not be run directly " )
print ( " fetching top news by keyword bitcoin... " )
2022-05-11 21:33:48 +00:00
2022-03-29 10:29:13 +00:00
articles = get_all_news_by_keyword ( " bitcoin " )
2022-03-15 13:01:33 +00:00
formatted_article = format_article ( articles [ " articles " ] [ 0 ] )
2022-04-12 08:12:35 +00:00
print ( formatted_article )
2022-04-25 15:45:47 +00:00
articles = get_top_news_by_keyword ( " bitcoin " )
formatted_article = format_article ( articles [ " articles " ] [ 0 ] )
print ( formatted_article )
2022-05-11 21:33:48 +00:00
sys . exit ( 1 )