95 lines
3.2 KiB
Python
95 lines
3.2 KiB
Python
"""
|
|
script for news fetching (by keywords)
|
|
"""
|
|
__author__ = "Florian Kellermann, Linus Eickhoff"
|
|
__date__ = "26.04.2022"
|
|
__version__ = "1.0.0"
|
|
__license__ = "None"
|
|
|
|
import os
|
|
import sys
|
|
|
|
import helper_functions as hf
|
|
import requests
|
|
from dotenv import load_dotenv
|
|
from newsapi import NewsApiClient
|
|
|
|
load_dotenv() # loads environment vars
|
|
|
|
# Init
|
|
api_key = os.getenv('NEWS_API_KEY') # get API Key from .env file
|
|
newsapi = NewsApiClient(api_key=api_key) # news api from https://newsapi.org/
|
|
|
|
try:
|
|
# get all available news sources (e.g BBC, New York Times, etc.)
|
|
source_json = requests.get(f"https://newsapi.org/v2/top-headlines/sources?apiKey={api_key}&language=en").json()
|
|
sources = source_json["sources"]
|
|
str_sources = ",".join([source["id"] for source in sources])
|
|
|
|
except KeyError:
|
|
print("Error: Could not get sources, may be blocked because of too many requests (free newsapi is limited to 100 reqs per day)")
|
|
str_sources = str(
|
|
"Reuters, bbc, cnn, fox-news, google-news, hacker-news, nytimes, the-huffington-post, the-new-york-times, business-insider, bbc-news, cbc-news, ESPN, fox-sports, google-news-uk, independent, the-wall-street-journal, the-washington-times, time, usa-today")
|
|
|
|
|
|
def get_all_news_by_keyword(keyword, from_date="2000-01-01"):
|
|
"""get all news to keyword
|
|
Args:
|
|
keyword (String): keyword for search
|
|
from_date (String): min date for search
|
|
|
|
Returns:
|
|
JSON/dict: dict containing articles
|
|
"""
|
|
top_headlines = newsapi.get_everything(q=keyword, sources=str_sources, language='en', from_param=from_date) # keywords can be combined with OR (e.g. keyword = "bitcoin OR ethereum")
|
|
if (top_headlines["status"] == "ok"):
|
|
return top_headlines
|
|
else:
|
|
return None
|
|
|
|
|
|
def get_top_news_by_keyword(keyword):
|
|
"""get top news to keyword
|
|
Args:
|
|
keyword (String): keyword for search
|
|
|
|
Returns:
|
|
JSON/dict: dict containing articles
|
|
"""
|
|
top_headlines = newsapi.get_top_headlines(q=keyword, sources=str_sources, language='en') # get top headlines, measured by popularity from NewsApi
|
|
if (top_headlines["status"] == "ok"):
|
|
return top_headlines
|
|
else:
|
|
return None
|
|
|
|
|
|
def format_article(article):
|
|
"""format article for messaging (using markdown syntax)
|
|
|
|
Args:
|
|
article (dict): article to format for messaging
|
|
|
|
Returns:
|
|
String: formatted article
|
|
"""
|
|
sourcename = hf.make_markdown_proof(article["source"]["name"]) # make attributes markdownv2 proof
|
|
headline = hf.make_markdown_proof(article["title"])
|
|
url = hf.make_markdown_proof(article["url"])
|
|
formatted_article = f"_{sourcename}_\n*{headline}*\n\n{url}" # formatting in Markdown syntax
|
|
|
|
return formatted_article
|
|
|
|
|
|
if __name__ == '__main__': # only execute if script is called directly -> for simple testing
|
|
|
|
print("this is a module and should not be run directly")
|
|
print("fetching top news by keyword bitcoin...")
|
|
|
|
articles = get_all_news_by_keyword("bitcoin")
|
|
formatted_article = format_article(articles["articles"][0])
|
|
print(formatted_article)
|
|
articles = get_top_news_by_keyword("bitcoin")
|
|
formatted_article = format_article(articles["articles"][0])
|
|
print(formatted_article)
|
|
sys.exit(1)
|