![]() |
|
Script does not work on Linux server - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: Web Scraping & Web Development (https://python-forum.io/forum-13.html) +--- Thread: Script does not work on Linux server (/thread-41811.html) |
Script does not work on Linux server - scrapemasta - Mar-22-2024 Hello there, First of all, I am not an expert for Python, but I rather learned it AD HOC. Namely, I have a script that scrapes name, sizes and prices for each size for products. I create excel file that the script reads, open links, gets the data, and saves it in a new excel file. Now, the issue is that the script is not able to click on a popup window (cookie) that shows up when run on server and accept the cookies - when I run it on my PC, the script works fine. Here is the code: import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
from bs4 import BeautifulSoup
from xlwt import Workbook
import pandas as pd
from selenium.webdriver.chrome.options import Options
# Set up Chrome options for headless browsing
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox") # Add this line if running as root user
# Initialize WebDriver
driver = webdriver.Chrome(options=chrome_options)
# Reading the file where the links of products are saved
df = pd.read_excel('OttyDuvets_Links.xlsx')
# Create a single Workbook and Worksheet
wb = Workbook()
ws = wb.add_sheet('Sheet1')
# Defining the Headers in Excel Worksheet
ws.write(0, 0, 'Name')
ws.write(0, 1, 'Size')
ws.write(0, 2, 'Price')
row = 1
# Iterate through all size options using a for loop
for i in range(len(df['Links'])):
driver.get(df['Links'][i])
time.sleep(5)
# Wait for the "cc-btn-decision" class to appear and then click on it
try:
decision_btn = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.XPATH, '/html/body/div[2]/div/div[2]/a[3]'))
)
decision_btn.click()
except Exception as e:
print("Error clicking on 'cc-btn-decision':", str(e))
time.sleep(3) # Adjust this delay as needed
# Parsing the source code of the webpage
soup = BeautifulSoup(driver.page_source, 'html.parser')
# Name of the Product
name_element = soup.find('h1')
name = name_element.text.strip() if name_element else "Name not found"
print(name)
# Find the button that triggers the dropdown
button = driver.find_element(By.XPATH, '/html[1]/body[1]/main[1]/section[1]/section[1]/div[1]/div[2]/product-info[1]/div[3]/variant-selects[1]/div[1]/div[1]/select[1]')
button.click()
# Add a delay to allow the dropdown to appear
time.sleep(3)
# Iterate through all size options using a for loop
for j in range(len(driver.find_elements(By.XPATH, '/html[1]/body[1]/main[1]/section[1]/section[1]/div[1]/div[2]/product-info[1]/div[3]/variant-selects[1]/div[1]/div[1]/select[1]/option'))):
# Find the dropdown container and size options each time
dropdown_container = driver.find_element(By.CLASS_NAME, 'select')
size_options = dropdown_container.find_elements(By.TAG_NAME, 'option')
option = size_options[j]
# Get the size from the option
selected_size = option.text.strip()
print(selected_size)
# Scroll into view using JavaScript
driver.execute_script("arguments[0].scrollIntoView();", option)
time.sleep(1)
# Click on the size option to select it
try:
option.click()
except StaleElementReferenceException:
# If the element is stale, re-find the dropdown and the option
dropdown_container = driver.find_element(By.CLASS_NAME, 'select')
size_options = dropdown_container.find_elements(By.TAG_NAME, 'option')
option = size_options[j]
option.click()
time.sleep(3)
# Find all the price elements for each selected size
price_elements = driver.find_elements(By.XPATH,
'/html[1]/body[1]/main[1]/section[1]/section[1]/div[1]/div[2]/product-info[1]/div[3]/div[1]/div[1]/div[1]/span[2]')
# Iterate through all price elements for the selected size
for price_element in price_elements:
price = price_element.text.strip()
print(price)
# Saving the name, size, and price of the product in the worksheet
ws.write(row, 0, name)
ws.write(row, 1, selected_size)
ws.write(row, 2, price)
row += 1
# Click on the dropdown again to show the rest of the options
driver.find_element(By.XPATH, '/html[1]/body[1]/main[1]/section[1]/section[1]/div[1]/div[2]/product-info[1]/div[3]/variant-selects[1]/div[1]/div[1]/select[1]').click()
time.sleep(3)
# Save the single Workbook with its name and type
wb.save('OttyDuvets_Details.xls')
# Close the browser
driver.quit()This is the error log I get:To me, it looks like ti cannot click on the dropdown element due to the overlay of the cookie popup. Can someone help? I also have a few scripts that work great on my PC but not on the server....the URL I am trying to scrape is: https://otty.com/products/luxury-microfibre-duvet-13-5-tog-double-duvet
|