-
Notifications
You must be signed in to change notification settings - Fork 16
/
scrapping.py
47 lines (29 loc) · 1.4 KB
/
scrapping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import math
url = 'https://www.kabum.com.br/cadeiras/cadeiras-gamer'
headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome / 86.0.4240.198Safari / 537.36"}
site = requests.get(url, headers=headers)
soup = BeautifulSoup(site.content, 'html.parser')
qtd_itens = soup.find('div', id='listingCount').get_text().strip()
index = qtd_itens.find(' ')
qtd = qtd_itens[:index]
ultima_pagina = math.ceil(int(qtd)/ 20)
dic_produtos = {'marca':[], 'preco':[]}
for i in range(1, ultima_pagina+1):
url_pag = f'https://www.kabum.com.br/cadeiras/cadeiras-gamer?page_number={i}&page_size=20&facet_filters=&sort=most_searched'
site = requests.get(url_pag, headers=headers)
soup = BeautifulSoup(site.content, 'html.parser')
produtos = soup.find_all('div', class_=re.compile('productCard'))
for produto in produtos:
marca = produto.find('span', class_=re.compile('nameCard')).get_text().strip()
preco = produto.find('span', class_=re.compile('priceCard')).get_text().strip()
print(marca, preco)
dic_produtos['marca'].append(marca)
dic_produtos['preco'].append(preco)
print(url_pag)
df = pd.DataFrame(dic_produtos)
df.to_csv('seu/path/preco_cadeira.csv', encoding='utf-8', sep=';')