GuessThePrice/source/fetcher.py
2022-05-03 11:13:27 +02:00

146 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""script with functions for fetching product data from amazon"""
import json
import requests
from bs4 import BeautifulSoup
def fetch_url(url):
"""fetch url and return response
Args:
url (String): url to fetch
Returns:
Text: html response from amazon
Raises:
None: None
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0'
}
response = requests.get(url, headers=headers)
return response.text
def get_title(response):
"""Get title from response
Args:
response (Text): html response from amazon
Returns:
String: title of product
Raises:
None: None
"""
soup = BeautifulSoup(response, 'html.parser')
title = soup.find_all("meta", attrs={'name': 'title'})
if len(title) > 0:
return title[0]["content"].split(":")[0].strip()
else:
return None
def get_image(response, title):
"""Get image urls from response
Args:
response (Text): html response from amazon
title (String): title of product
Returns:
String: product image url
Raises:
None: None
"""
soup = BeautifulSoup(response, 'html.parser')
images = soup.find_all("img", alt=lambda a: a == title.replace("&", "&"))
if len(images) > 0:
images = json.loads(images[0]["data-a-dynamic-image"])
# Find largest image
largest_image_url = None
largest_image_size = 0
for image in images:
if largest_image_url is None and largest_image_size == 0 or images[image][0] > largest_image_size:
largest_image_url = image
largest_image_size = images[image][0]
return largest_image_url
else:
return None
def get_description(response):
"""Get description from response
Args:
response (Text): html response from amazon
Returns:
String: product description
Raises:
None: None
"""
soup = BeautifulSoup(response, 'html.parser')
description = soup.find_all("div", id="feature-bullets")
ret = ""
if len(description) > 0:
for item in description[0].find_all("span"):
if "um sicherzustellen, dass dieser Artikel passt." not in item.text and "" not in item.text:
ret += item.text.strip() + "\n"
# Remove last newline and return
return ret[:-1]
else:
return None
def get_price(response):
"""Get price from response
Args:
response (Text): html response from amazon
Returns:
Array: product price and currency
Raises:
None: None
"""
soup = BeautifulSoup(response, 'html.parser')
price = soup.find_all("div", {"class": "twister-plus-buying-options-price-data"})
if len(price) > 0:
j = json.loads(price[0].text)
if len(j) > 0:
return [j[0]["priceAmount"], j[0]["currencySymbol"]]
else:
return None
else:
return None
if __name__ == "__main__":
"""Main function"""
prod_src = fetch_url('https://www.amazon.de/dp/B082QDB6CG')
print("Title: " + get_title(prod_src) + "\n")
print("Image: " + get_image(prod_src, get_title(prod_src)) + "\n")
print("Price: " + str(get_price(prod_src)) + "\n")
print("Description: " + get_description(prod_src))