Python Forum
Size scraping issue - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: Python Coding (https://python-forum.io/forum-7.html)
+--- Forum: Web Scraping & Web Development (https://python-forum.io/forum-13.html)
+--- Thread: Size scraping issue (/thread-41574.html)



Size scraping issue - scrapemasta - Feb-09-2024

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
import pandas as pd
from xlwt import Workbook
from bs4 import BeautifulSoup
import time

wb = Workbook()  # creating Workbook in an excel sheet to store data
ws = wb.add_sheet('Sheet1')

# Defining the Headers in the Excel Worksheet
ws.write(0, 0, 'Name')
ws.write(0, 1, 'Size')
ws.write(0, 2, 'Price')
row = 1

# Reading the file where the links of products are saved
df = pd.read_excel('AeylaBlankets_Links.xlsx')
driver = webdriver.Firefox()

# Driver Getting the Links to open them on web browsers
for i in range(len(df['Links'])):
    driver.get(df['Links'][i])
    time.sleep(5)

    # Parsing the source code of the webpage
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Name of the Product
    name = soup.find('h1').text.strip()
    print(name)

    while True:
        try:
            # Finding the main class that contains all the sizes
            sizes_dropdown = driver.find_element(By.XPATH, '/html[1]/body[1]/main[1]/section[1]/div[2]/div[1]/div[4]/div[1]/form[1]/div[1]/div[2]/div[1]/div[1]/div')
            sizes_options = sizes_dropdown.find_elements(By.XPATH, '/html[1]/body[1]/main[1]/section[1]/div[2]/div[1]/div[4]/div[1]/form[1]/div[1]/div[2]/div[1]/div[1]/div[1]')

            if not sizes_options:
                break

            # Using for loop to find all of the sizes one by one
            for size_option in sizes_options:
                size = size_option.text.strip()  # Strip extra whitespace

                # Click the option to select it
                size_option.click()
                time.sleep(3)

                # Finding the Class of Prize
                prize = driver.find_element(By.XPATH, '/html[1]/body[1]/main[1]/section[1]/div[2]/div[1]/div[4]/div[1]/div[3]/div[1]/span[2]').text
                print(size)
                print(prize)

                # Saving the name, size, and price of the product in the worksheet
                ws.write(row, 0, name)
                ws.write(row, 1, size)
                ws.write(row, 2, prize)
                row = row + 1

        except Exception as e:
            print(f"Error: {e}")

# Save the Workbook with its name and type
wb.save('AeylaBlankets_Detail.xls')

# Close the webdriver
driver.quit()
This is the URL I am trying to scrape: https://www.aeyla.co.uk/products/mela-weighted-blanket?variant=44467967328542

I want to click on each size from the dropdown and get the price for it. However, my script at the moment only clicks on the dropdown and does not iterate through the rest of sizes. Can someone help?