2022-05-12 11:30:56 +00:00
<!doctype html>
< html lang = "en" >
< head >
< meta charset = "utf-8" >
< meta name = "viewport" content = "width=device-width, initial-scale=1, minimum-scale=1" / >
< meta name = "generator" content = "pdoc 0.10.0" / >
< title > telegram_bot.news.news_fetcher API documentation< / title >
< meta name = "description" content = "script for news fetching (by keywords)" / >
2022-09-18 11:05:35 +00:00
< link rel = "preload stylesheet" as = "style" href = "https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity = "sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin >
< link rel = "preload stylesheet" as = "style" href = "https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity = "sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin >
2022-09-17 22:58:59 +00:00
< link rel = "stylesheet preload" as = "style" href = "https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.6.0/styles/github.min.css" crossorigin >
2022-05-12 11:30:56 +00:00
< style > : root { --highlight-color : #fe9 } . flex { display : flex !important } body { line-height : 1.5 em } # content { padding : 20 px } # sidebar { padding : 30 px ; overflow : hidden } # sidebar > * : last-child { margin-bottom : 2 cm } . http-server-breadcrumbs { font-size : 130 % ; margin : 0 0 15 px 0 } # footer { font-size : .75 em ; padding : 5 px 30 px ; border-top : 1 px solid #ddd ; text-align : right } # footer p { margin : 0 0 0 1 em ; display : inline-block } # footer p : last-child { margin-right : 30 px } h1 , h2 , h3 , h4 , h5 { font-weight : 300 } h1 { font-size : 2.5 em ; line-height : 1.1 em } h2 { font-size : 1.75 em ; margin : 1 em 0 .50 em 0 } h3 { font-size : 1.4 em ; margin : 25 px 0 10 px 0 } h4 { margin : 0 ; font-size : 105 % } h1 : target , h2 : target , h3 : target , h4 : target , h5 : target , h6 : target { background : var ( - - highlight - color ) ; padding : .2 em 0 } a { color : #058 ; text-decoration : none ; transition : color .3 s ease-in-out } a : hover { color : #e82 } . title code { font-weight : bold } h2 [ id ^ = "header-" ] { margin-top : 2 em } . ident { color : #900 } pre code { background : #f8f8f8 ; font-size : .8 em ; line-height : 1.4 em } code { background : #f2f2f1 ; padding : 1 px 4 px ; overflow-wrap : break-word } h1 code { background : transparent } pre { background : #f8f8f8 ; border : 0 ; border-top : 1 px solid #ccc ; border-bottom : 1 px solid #ccc ; margin : 1 em 0 ; padding : 1 ex } # http-server-module-list { display : flex ; flex-flow : column } # http-server-module-list div { display : flex } # http-server-module-list dt { min-width : 10 % } # http-server-module-list p { margin-top : 0 } . toc ul , # index { list-style-type : none ; margin : 0 ; padding : 0 } # index code { background : transparent } # index h3 { border-bottom : 1 px solid #ddd } # index ul { padding : 0 } # index h4 { margin-top : .6 em ; font-weight : bold } @ media ( min-width : 200ex ) { # index . two-column { column-count : 2 } } @ media ( min-width : 300ex ) { # index . two-column { column-count : 3 } } dl { margin-bottom : 2 em } dl dl : last-child { margin-bottom : 4 em } dd { margin : 0 0 1 em 3 em } # header-classes + dl > dd { margin-bottom : 3 em } dd dd { margin-left : 2 em } dd p { margin : 10 px 0 } . name { background : #eee ; font-weight : bold ; font-size : .85 em ; padding : 5 px 10 px ; display : inline-block ; min-width : 40 % } . name : hover { background : #e0e0e0 } dt : target . name { background : var ( - - highlight - color ) } . name > span : first-child { white-space : nowrap } . name . class > span : nth-child ( 2 ) { margin-left : .4 em } . inherited { color : #999 ; border-left : 5 px solid #eee ; padding-left : 1 em } . inheritance em { font-style : normal ; font-weight : bold } . desc h2 { font-weight : 400 ; font-size : 1.25 em } . desc h3 { font-size : 1 em } . desc dt code { background : inherit } . source summary , . git-link-div { color : #666 ; text-align : right ; font-weight : 400 ; font-size : .8 em ; text-transform : uppercase } . source summary > * { white-space : nowrap ; cursor : pointer } . git-link { color : inherit ; margin-left : 1 em } . source pre { max-height : 500 px ; overflow : auto ; margin : 0 } . source pre code { font-size : 12 px ; overflow : visible } . hlist { list-style : none } . hlist li { display : inline } . hlist li : after { content : ',\2002' } . hlist li : last-child : after { content : none } . hlist . hlist { display : inline ; padding-left : 1 em } img { max-width : 100 % } td { padding : 0 .5 em } . admonition { padding : .1 em .5 em ; margin-bottom : 1 em } . admonition-title { font-weight : bold } . admonition . note , . admonition . info , . admonition . important { background : #aef } . admonition . todo , . admonition . versionadded , . admonition . tip , . admonition . hint { background : #dfd } . admonition . warning , . admonition . versionchanged , . admonition . deprecated { background : #fd4 } . admonition . error , . admonition . danger , . admonition . caution { background : lightpink } < / style >
< style media = "screen and (min-width: 700px)" > @ media screen and ( min-width : 700px ) { # sidebar { width : 30 % ; height : 100 vh ; overflow : auto ; position : sticky ; top : 0 } # content { width : 70 % ; max-width : 100 ch ; padding : 3 em 4 em ; border-left : 1 px solid #ddd } pre code { font-size : 1 em } . item . name { font-size : 1 em } main { display : flex ; flex-direction : row-reverse ; justify-content : flex-end } . toc ul ul , # index ul { padding-left : 1.5 em } . toc > ul > li { margin-top : .5 em } } < / style >
< style media = "print" > @ media print { # sidebar h1 { page-break-before : always } . source { display : none } } @ media print { * { background : transparent !important ; color : #000 !important ; box-shadow : none !important ; text-shadow : none !important } a [ href ] : after { content : " (" attr ( href ) ")" ; font-size : 90 % } a [ href ] [ title ] : after { content : none } abbr [ title ] : after { content : " (" attr ( title ) ")" } . ir a : after , a [ href ^ = "javascript:" ] : after , a [ href ^ = "#" ] : after { content : "" } pre , blockquote { border : 1 px solid #999 ; page-break-inside : avoid } thead { display : table-header-group } tr , img { page-break-inside : avoid } img { max-width : 100 % !important } @ page { margin : 0 . 5cm } p , h2 , h3 { orphans : 3 ; widows : 3 } h1 , h2 , h3 , h4 , h5 , h6 { page-break-after : avoid } } < / style >
2022-09-17 22:58:59 +00:00
< script defer src = "https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.6.0/highlight.min.js" integrity = "sha512-gU7kztaQEl7SHJyraPfZLQCNnrKdaQi5ndOyt4L4UPL/FHDd/uB9Je6KDARIqwnNNE27hnqoWLBq+Kpe4iHfeQ==" crossorigin > < / script >
2022-05-12 11:30:56 +00:00
< script > window . addEventListener ( 'DOMContentLoaded' , ( ) => hljs . initHighlighting ( ) ) < / script >
< / head >
< body >
< main >
< article id = "content" >
< header >
< h1 class = "title" > Module < code > telegram_bot.news.news_fetcher< / code > < / h1 >
< / header >
< section id = "section-intro" >
< p > script for news fetching (by keywords)< / p >
< details class = "source" >
< summary >
< span > Expand source code< / span >
< / summary >
< pre > < code class = "python" > " " "
script for news fetching (by keywords)
" " "
__author__ = " Florian Kellermann, Linus Eickhoff"
__date__ = " 26.04.2022"
__version__ = " 1.0.0"
__license__ = " None"
import os
import sys
import telegram_bot.helper_functions as hf
import requests
from dotenv import load_dotenv
from newsapi import NewsApiClient
load_dotenv() # loads environment vars
# Init
api_key = os.getenv(' NEWS_API_KEY' ) # get API Key from .env file
newsapi = NewsApiClient(api_key=api_key) # news api from https://newsapi.org/
try:
# get all available news sources (e.g BBC, New York Times, etc.)
source_json = requests.get(f" https://newsapi.org/v2/top-headlines/sources?apiKey={api_key}& language=en" ).json()
sources = source_json[" sources" ]
str_sources = " ," .join([source[" id" ] for source in sources])
except KeyError:
print(" Error: Could not get sources, may be blocked because of too many requests (free newsapi is limited to 100 reqs per day)" )
str_sources = str(
" Reuters, bbc, cnn, fox-news, google-news, hacker-news, nytimes, the-huffington-post, the-new-york-times, business-insider, bbc-news, cbc-news, ESPN, fox-sports, google-news-uk, independent, the-wall-street-journal, the-washington-times, time, usa-today" )
def get_all_news_by_keyword(keyword, from_date=" 2000-01-01" ):
" " " get all news to keyword
Args:
keyword (String): keyword for search
from_date (String): min date for search
Returns:
JSON/dict: dict containing articles
" " "
top_headlines = newsapi.get_everything(q=keyword, sources=str_sources, language=' en' , from_param=from_date) # keywords can be combined with OR (e.g. keyword = " bitcoin OR ethereum" )
if (top_headlines[" status" ] == " ok" ):
return top_headlines
else:
return None
def get_top_news_by_keyword(keyword):
" " " get top news to keyword
Args:
keyword (String): keyword for search
Returns:
JSON/dict: dict containing articles
" " "
top_headlines = newsapi.get_top_headlines(q=keyword, sources=str_sources, language=' en' ) # get top headlines, measured by popularity from NewsApi
if (top_headlines[" status" ] == " ok" ):
return top_headlines
else:
return None
def format_article(article):
" " " format article for messaging (using markdown syntax)
Args:
article (dict): article to format for messaging
Returns:
String: formatted article
" " "
sourcename = hf.make_markdown_proof(article[" source" ][" name" ]) # make attributes markdownv2 proof
headline = hf.make_markdown_proof(article[" title" ])
url = hf.make_markdown_proof(article[" url" ])
formatted_article = f" _{sourcename}_\n*{headline}*\n\n{url}" # formatting in Markdown syntax
return formatted_article
if __name__ == ' __main__' : # only execute if script is called directly -> for simple testing
print(" this is a module and should not be run directly" )
print(" fetching top news by keyword bitcoin..." )
articles = get_all_news_by_keyword(" bitcoin" )
formatted_article = format_article(articles[" articles" ][0])
print(formatted_article)
articles = get_top_news_by_keyword(" bitcoin" )
formatted_article = format_article(articles[" articles" ][0])
print(formatted_article)
sys.exit(1)< / code > < / pre >
< / details >
< / section >
< section >
< / section >
< section >
< / section >
< section >
< h2 class = "section-title" id = "header-functions" > Functions< / h2 >
< dl >
< dt id = "telegram_bot.news.news_fetcher.format_article" > < code class = "name flex" >
< span > def < span class = "ident" > format_article< / span > < / span > (< span > article)< / span >
< / code > < / dt >
< dd >
< div class = "desc" > < p > format article for messaging (using markdown syntax)< / p >
< h2 id = "args" > Args< / h2 >
< dl >
< dt > < strong > < code > article< / code > < / strong > :  < code > dict< / code > < / dt >
< dd > article to format for messaging< / dd >
< / dl >
< h2 id = "returns" > Returns< / h2 >
< dl >
< dt > < code > String< / code > < / dt >
< dd > formatted article< / dd >
< / dl > < / div >
< details class = "source" >
< summary >
< span > Expand source code< / span >
< / summary >
< pre > < code class = "python" > def format_article(article):
" " " format article for messaging (using markdown syntax)
Args:
article (dict): article to format for messaging
Returns:
String: formatted article
" " "
sourcename = hf.make_markdown_proof(article[" source" ][" name" ]) # make attributes markdownv2 proof
headline = hf.make_markdown_proof(article[" title" ])
url = hf.make_markdown_proof(article[" url" ])
formatted_article = f" _{sourcename}_\n*{headline}*\n\n{url}" # formatting in Markdown syntax
return formatted_article< / code > < / pre >
< / details >
< / dd >
< dt id = "telegram_bot.news.news_fetcher.get_all_news_by_keyword" > < code class = "name flex" >
< span > def < span class = "ident" > get_all_news_by_keyword< / span > < / span > (< span > keyword, from_date='2000-01-01')< / span >
< / code > < / dt >
< dd >
< div class = "desc" > < p > get all news to keyword< / p >
< h2 id = "args" > Args< / h2 >
< dl >
< dt > < strong > < code > keyword< / code > < / strong > :  < code > String< / code > < / dt >
< dd > keyword for search< / dd >
< dt > < strong > < code > from_date< / code > < / strong > :  < code > String< / code > < / dt >
< dd > min date for search< / dd >
< / dl >
< h2 id = "returns" > Returns< / h2 >
< p > JSON/dict: dict containing articles< / p > < / div >
< details class = "source" >
< summary >
< span > Expand source code< / span >
< / summary >
< pre > < code class = "python" > def get_all_news_by_keyword(keyword, from_date=" 2000-01-01" ):
" " " get all news to keyword
Args:
keyword (String): keyword for search
from_date (String): min date for search
Returns:
JSON/dict: dict containing articles
" " "
top_headlines = newsapi.get_everything(q=keyword, sources=str_sources, language=' en' , from_param=from_date) # keywords can be combined with OR (e.g. keyword = " bitcoin OR ethereum" )
if (top_headlines[" status" ] == " ok" ):
return top_headlines
else:
return None< / code > < / pre >
< / details >
< / dd >
< dt id = "telegram_bot.news.news_fetcher.get_top_news_by_keyword" > < code class = "name flex" >
< span > def < span class = "ident" > get_top_news_by_keyword< / span > < / span > (< span > keyword)< / span >
< / code > < / dt >
< dd >
< div class = "desc" > < p > get top news to keyword< / p >
< h2 id = "args" > Args< / h2 >
< dl >
< dt > < strong > < code > keyword< / code > < / strong > :  < code > String< / code > < / dt >
< dd > keyword for search< / dd >
< / dl >
< h2 id = "returns" > Returns< / h2 >
< p > JSON/dict: dict containing articles< / p > < / div >
< details class = "source" >
< summary >
< span > Expand source code< / span >
< / summary >
< pre > < code class = "python" > def get_top_news_by_keyword(keyword):
" " " get top news to keyword
Args:
keyword (String): keyword for search
Returns:
JSON/dict: dict containing articles
" " "
top_headlines = newsapi.get_top_headlines(q=keyword, sources=str_sources, language=' en' ) # get top headlines, measured by popularity from NewsApi
if (top_headlines[" status" ] == " ok" ):
return top_headlines
else:
return None< / code > < / pre >
< / details >
< / dd >
< / dl >
< / section >
< section >
< / section >
< / article >
< nav id = "sidebar" >
< h1 > Index< / h1 >
< div class = "toc" >
< ul > < / ul >
< / div >
< ul id = "index" >
< li > < h3 > Super-module< / h3 >
< ul >
< li > < code > < a title = "telegram_bot.news" href = "index.html" > telegram_bot.news< / a > < / code > < / li >
< / ul >
< / li >
< li > < h3 > < a href = "#header-functions" > Functions< / a > < / h3 >
< ul class = "" >
< li > < code > < a title = "telegram_bot.news.news_fetcher.format_article" href = "#telegram_bot.news.news_fetcher.format_article" > format_article< / a > < / code > < / li >
< li > < code > < a title = "telegram_bot.news.news_fetcher.get_all_news_by_keyword" href = "#telegram_bot.news.news_fetcher.get_all_news_by_keyword" > get_all_news_by_keyword< / a > < / code > < / li >
< li > < code > < a title = "telegram_bot.news.news_fetcher.get_top_news_by_keyword" href = "#telegram_bot.news.news_fetcher.get_top_news_by_keyword" > get_top_news_by_keyword< / a > < / code > < / li >
< / ul >
< / li >
< / ul >
< / nav >
< / main >
< footer id = "footer" >
< p > Generated by < a href = "https://pdoc3.github.io/pdoc" title = "pdoc: Python API documentation generator" > < cite > pdoc< / cite > 0.10.0< / a > .< / p >
< / footer >
< / body >
< / html >