-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathselenium_scrapping.py
More file actions
66 lines (54 loc) · 2.83 KB
/
selenium_scrapping.py
File metadata and controls
66 lines (54 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# importing needed syntax
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
import pandas as pd
# ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# choosing webdriver
driver = webdriver.Chrome()
driver.get("https://duckduckgo.com/")
time.sleep(3)
# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# searc bar
elem = driver.find_element(By.XPATH, '//*[@id="searchbox_input"]')
elem.send_keys('Flipkart')
elem.send_keys(Keys.ENTER)
time.sleep(3)
# --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# website
elem = driver.find_element(By.XPATH, '//*[@id="r1-0"]/div[2]/div/div/a/div/p/span').click()
time.sleep(3)
# --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# search bar
elem = driver.find_element(By.XPATH, '//*[@id="container"]/div/div[1]/div/div/div/div/div[1]/div/div/div/div[1]/div[1]/header/div[1]/div[2]/form/div/div/input')
elem.send_keys('Mobiles')
elem.send_keys(Keys.ENTER)
time.sleep(6)
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# scrapping part
products = driver.find_elements(By.CLASS_NAME, 'KzDlHZ') # Replace with actual selector
prices = driver.find_elements(By.TAG_NAME, '.yRaY8j ZYYwLA') # Replace with actual selector
# Extract the text from the elements
product_names = [product.text for product in products]
product_prices = [price.text for price in prices]
# Debugging output: Check the length and contents
print(f"Length of product_names: {len(product_names)}")
print(f"Length of product_prices: {len(product_prices)}")
# Check the data
print("Product Names:", product_names)
print("Product Prices:", product_prices)
# Ensure both lists have the same length by adding None for missing items
max_length = max(len(product_names), len(product_prices))
product_names.extend([None] * (max_length - len(product_names)))
product_prices.extend([None] * (max_length - len(product_prices)))
# Combine the data into a dictionary
data = {
'Product Name': product_names,
'Price': product_prices
}
# Create the DataFrame
df = pd.DataFrame(data)
# Save to Excel
df.to_excel("scraped_data.xlsx", index=False)
print("Data has been saved to 'scraped_data.xlsx'")