2022-03-15 08:05:28 +00:00
|
|
|
"""
|
|
|
|
script for news fetching (by keywords)
|
|
|
|
"""
|
|
|
|
__author__ = "Florian Kellermann, Linus Eickhoff"
|
|
|
|
__date__ = "15.03.2022"
|
|
|
|
__version__ = "0.0.1"
|
2022-03-15 09:39:29 +00:00
|
|
|
__license__ = "None"
|
|
|
|
|
|
|
|
import sys
|
2022-03-15 13:01:33 +00:00
|
|
|
import os
|
2022-03-15 12:54:08 +00:00
|
|
|
import json
|
2022-03-15 13:01:33 +00:00
|
|
|
|
2022-03-15 09:39:29 +00:00
|
|
|
import pandas as pd
|
|
|
|
|
2022-03-15 13:01:33 +00:00
|
|
|
from newsapi import NewsApiClient
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
load_dotenv()
|
|
|
|
|
2022-03-15 10:27:54 +00:00
|
|
|
# Init
|
2022-03-15 13:01:33 +00:00
|
|
|
newsapi = NewsApiClient(api_key=os.getenv('NEWS_API_KEY'))
|
2022-03-15 10:27:54 +00:00
|
|
|
|
|
|
|
# /v2/top-headlines
|
|
|
|
top_headlines = newsapi.get_top_headlines(q='bitcoin', sources='bbc-news,the-verge', language='en')
|
|
|
|
|
|
|
|
# /v2/everything
|
|
|
|
all_articles = newsapi.get_everything(q='bitcoin', sources='bbc-news,the-verge', domains='bbc.co.uk,techcrunch.com', from_param='2022-03-14', to='2022-03-15', language='en', sort_by='relevancy', page=2)
|
|
|
|
|
|
|
|
# /v2/top-headlines/sources
|
|
|
|
sources = newsapi.get_sources()
|
|
|
|
|
2022-03-15 12:54:08 +00:00
|
|
|
def get_top_news_by_keyword(keyword):
|
|
|
|
top_headlines = newsapi.get_top_headlines(q=keyword, sources='bbc-news,the-verge,cnn', language='en')
|
|
|
|
out_file = open("top_headline.json", "w")
|
|
|
|
json.dump(top_headlines, out_file)
|
|
|
|
return top_headlines
|
|
|
|
|
|
|
|
def format_article(article):
|
|
|
|
sourcename = article["source"]["name"]
|
|
|
|
headline = article["title"]
|
|
|
|
url = article["url"]
|
|
|
|
formatted_article = f"<i>{sourcename}</i>\n{headline}\n\n<a href=\"{url}\">text</a>"
|
|
|
|
|
|
|
|
return formatted_article
|
|
|
|
|
2022-03-15 09:39:29 +00:00
|
|
|
if __name__ == '__main__':
|
2022-03-15 13:01:33 +00:00
|
|
|
|
|
|
|
print("fetching top news by keyword business...")
|
|
|
|
|
2022-03-15 12:54:08 +00:00
|
|
|
articles = get_top_news_by_keyword("business")
|
2022-03-15 13:01:33 +00:00
|
|
|
formatted_article = format_article(articles["articles"][0])
|
|
|
|
print(formatted_article)
|