""" script for news fetching (by keywords) """ __author__ = "Florian Kellermann, Linus Eickhoff" __date__ = "15.03.2022" __version__ = "0.0.1" __license__ = "None" import sys import os import json import requests import datetime as dt from newsapi import NewsApiClient from dotenv import load_dotenv load_dotenv() # loads environment vars # Init api_key = os.getenv('NEWS_API_KEY') # get API Key from .env file newsapi = NewsApiClient(api_key=api_key) # news api from https://newsapi.org/ try: # get all available news sources (e.g BBC, New York Times, etc.) source_json = requests.get(f"https://newsapi.org/v2/top-headlines/sources?apiKey={api_key}&language=en").json() sources = source_json["sources"] str_sources = ",".join([source["id"] for source in sources]) except KeyError: print("Error: Could not get sources") sys.exit(1) def get_all_news_by_keyword(keyword, from_date="2000-01-01"): """get all news to keyword Args: keyword (String): keyword for search from_date (String): min date for search Returns: JSON/dict: dict containing articles """ top_headlines = newsapi.get_everything(q=keyword, sources=str_sources, language='en', from_param=from_date) # keywords can be combined with OR (e.g. keyword = "bitcoin OR ethereum") if(top_headlines["status"] == "ok"): return top_headlines else: return None def get_top_news_by_keyword(keyword): """get top news to keyword Args: keyword (String): keyword for search Returns: JSON/dict: dict containing articles """ top_headlines = newsapi.get_top_headlines(q=keyword, sources=str_sources, language='en') # get top headlines, measured by popularity from NewsApi if(top_headlines["status"] == "ok"): return top_headlines else: return None def format_article(article): """format article for messaging (using markdown syntax) Args: article (dict): article to format for messaging Returns: String: formatted article """ sourcename = article["source"]["name"] headline = article["title"] url = article["url"] formatted_article = f"_{sourcename}_\n*{headline}*\n\n{url}" # formatting in Markdown syntax return formatted_article if __name__ == '__main__': # only execute if script is called directly -> for simple testing print("this is a module and should not be run directly") print("fetching top news by keyword bitcoin...") articles = get_all_news_by_keyword("bitcoin") formatted_article = format_article(articles["articles"][0]) print(formatted_article) articles = get_top_news_by_keyword("bitcoin") formatted_article = format_article(articles["articles"][0]) print(formatted_article) sys.exit(1)