Module telegram_bot.news.news_fetcher

script for news fetching (by keywords)

Expand source code
"""
script for news fetching (by keywords)
"""
__author__ = "Florian Kellermann, Linus Eickhoff"
__date__ = "26.04.2022"
__version__ = "1.0.0"
__license__ = "None"

import os
import sys

import telegram_bot.helper_functions as hf
import requests
from dotenv import load_dotenv
from newsapi import NewsApiClient

load_dotenv()  # loads environment vars

# Init
api_key = os.getenv('NEWS_API_KEY')  # get API Key from .env file
newsapi = NewsApiClient(api_key=api_key)  # news api from https://newsapi.org/

try:
    # get all available news sources (e.g BBC, New York Times, etc.)
    source_json = requests.get(f"https://newsapi.org/v2/top-headlines/sources?apiKey={api_key}&language=en").json()
    sources = source_json["sources"]
    str_sources = ",".join([source["id"] for source in sources])

except KeyError:
    print("Error: Could not get sources, may be blocked because of too many requests (free newsapi is limited to 100 reqs per day)")
    str_sources = str(
        "Reuters, bbc, cnn, fox-news, google-news, hacker-news, nytimes, the-huffington-post, the-new-york-times, business-insider, bbc-news, cbc-news, ESPN, fox-sports, google-news-uk, independent, the-wall-street-journal, the-washington-times, time, usa-today")


def get_all_news_by_keyword(keyword, from_date="2000-01-01"):
    """get all news to keyword
    Args:
        keyword (String): keyword for search
        from_date (String): min date for search

    Returns:
        JSON/dict: dict containing articles
    """
    top_headlines = newsapi.get_everything(q=keyword, sources=str_sources, language='en', from_param=from_date)  # keywords can be combined with OR (e.g. keyword = "bitcoin OR ethereum")
    if (top_headlines["status"] == "ok"):
        return top_headlines
    else:
        return None


def get_top_news_by_keyword(keyword):
    """get top news to keyword
    Args:
        keyword (String): keyword for search

    Returns:
        JSON/dict: dict containing articles
    """
    top_headlines = newsapi.get_top_headlines(q=keyword, sources=str_sources, language='en')  # get top headlines, measured by popularity from NewsApi
    if (top_headlines["status"] == "ok"):
        return top_headlines
    else:
        return None


def format_article(article):
    """format article for messaging (using markdown syntax)

    Args:
        article (dict): article to format for messaging

    Returns:
        String: formatted article
    """
    sourcename = hf.make_markdown_proof(article["source"]["name"])  # make attributes markdownv2 proof
    headline = hf.make_markdown_proof(article["title"])
    url = hf.make_markdown_proof(article["url"])
    formatted_article = f"_{sourcename}_\n*{headline}*\n\n{url}"  # formatting in Markdown syntax

    return formatted_article


if __name__ == '__main__':  # only execute if script is called directly -> for simple testing

    print("this is a module and should not be run directly")
    print("fetching top news by keyword bitcoin...")

    articles = get_all_news_by_keyword("bitcoin")
    formatted_article = format_article(articles["articles"][0])
    print(formatted_article)
    articles = get_top_news_by_keyword("bitcoin")
    formatted_article = format_article(articles["articles"][0])
    print(formatted_article)
    sys.exit(1)

Functions

def format_article(article)

format article for messaging (using markdown syntax)

Args

article : dict
article to format for messaging

Returns

String
formatted article
Expand source code
def format_article(article):
    """format article for messaging (using markdown syntax)

    Args:
        article (dict): article to format for messaging

    Returns:
        String: formatted article
    """
    sourcename = hf.make_markdown_proof(article["source"]["name"])  # make attributes markdownv2 proof
    headline = hf.make_markdown_proof(article["title"])
    url = hf.make_markdown_proof(article["url"])
    formatted_article = f"_{sourcename}_\n*{headline}*\n\n{url}"  # formatting in Markdown syntax

    return formatted_article
def get_all_news_by_keyword(keyword, from_date='2000-01-01')

get all news to keyword

Args

keyword : String
keyword for search
from_date : String
min date for search

Returns

JSON/dict: dict containing articles

Expand source code
def get_all_news_by_keyword(keyword, from_date="2000-01-01"):
    """get all news to keyword
    Args:
        keyword (String): keyword for search
        from_date (String): min date for search

    Returns:
        JSON/dict: dict containing articles
    """
    top_headlines = newsapi.get_everything(q=keyword, sources=str_sources, language='en', from_param=from_date)  # keywords can be combined with OR (e.g. keyword = "bitcoin OR ethereum")
    if (top_headlines["status"] == "ok"):
        return top_headlines
    else:
        return None
def get_top_news_by_keyword(keyword)

get top news to keyword

Args

keyword : String
keyword for search

Returns

JSON/dict: dict containing articles

Expand source code
def get_top_news_by_keyword(keyword):
    """get top news to keyword
    Args:
        keyword (String): keyword for search

    Returns:
        JSON/dict: dict containing articles
    """
    top_headlines = newsapi.get_top_headlines(q=keyword, sources=str_sources, language='en')  # get top headlines, measured by popularity from NewsApi
    if (top_headlines["status"] == "ok"):
        return top_headlines
    else:
        return None