-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathwebscrapt2
More file actions
43 lines (33 loc) · 1 KB
/
webscrapt2
File metadata and controls
43 lines (33 loc) · 1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from requests_html import HTMLSession
from bs4 import BeautifulSoup
s=HTMLSession()
searchterm='jamon'
url=f'https://www.amazon.es/s?k={searchterm}&__mk_es_ES=%C3%85M%C3%85%C5%BD%C3%95%C3%91&ref=nb_sb_noss'
def get_data(url):
r=s.get(url)
r.html.render(timeout=20)
soup=BeautifulSoup(r.html.html,'html.parser')
return soup
def get_object(soup):
products=soup.find_all('div',{'data-component-type':'s-search-result'})
lista_diccionarios=[]
for product in products:
title=product.find('a',{'class':'a-link-normal a-text-normal'}).text[:25]
link='https://www.amazon.es'+product.find('a',{'class':'a-link-normal a-text-normal'})['href']
try:
price=product.find('span',{'class':'a-price-whole'}).text
except:
price=0
try:
reviews=int(product.find('span',{'class':'a-size-base'}).text)
except:
reviews=0
almacenador={
'title':title,
'link':link,
'price':price,
'reviews':reviews
}
lista_diccionarios.append(almacenador)
return lista_diccionarios
print(get_object(get_data(url)))