Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
oddspedia charts
#11
Copy Selector from browser in Inspect.
img = browser.find_element(By.CSS_SELECTOR, '#__layout > div > div.wrapper__inner > div.main > div.container > div > div > main > div:nth-child(3) > div.ml__wrap > div:nth-child(6) > div > a > div.match-teams > div:nth-child(1) > img')
print(img.get_attribute('src'))
Output:
https://cdn.oddspedia.com/images/teams/medium/1/3190.png
Reply
#12
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By  # Add this import statement
import chromedriver_autoinstaller
import time

# Install ChromeDriver automatically
chromedriver_autoinstaller.install()

# Setup Chrome options
options = Options()
# Uncomment the next line if you want to run Chrome in headless mode
# options.add_argument("--headless")

# Create a Chrome WebDriver instance without specifying the driver executable path
browser = webdriver.Chrome(options=options)

# Navigate to the URL
url = 'https://oddspedia.com/football'
browser.get(url)

# Use sleep 
time.sleep(3)


game = browser.find_element(By.CSS_SELECTOR, 'main > div:nth-child(3) > div.ml__wrap > div:nth-child(16) > div')
print(game.text)
img = browser.find_element(By.CSS_SELECTOR, 'main > div:nth-child(3) > div.ml__wrap > div:nth-child(16) > div > a > div.match-teams > div:nth-child(1) > img')
print(img.get_attribute('src'))
img = browser.find_element(By.CSS_SELECTOR, '#__layout > div > div.wrapper__inner > div.main > div.container > div > div > main > div:nth-child(3) > div.ml__wrap > div:nth-child(6) > div > a > div.match-teams > div:nth-child(1) > img')
print(img.get_attribute('src'))
# Close the browser
browser.quit()
I didn't understood why starts from that match and not from the first of the list of website
I tryed to iterate with no success, can see names and time but only the first images
indices_to_check = range(1, 51)

# Loop through each index and print information
for index in indices_to_check:
    try:
        # Construct the CSS selector for the current index
        css_selector = f'main > div:nth-child(3) > div.ml__wrap > div:nth-child({index}) > div'
        
        # Find the element with the current CSS selector
        game = browser.find_element(By.CSS_SELECTOR, css_selector)
        print(game.text)

        # Find the image element within the current element
        img_css_selector = f'{css_selector} > a > div.match-teams > div:nth-child(1) > img'
        img = browser.find_element(By.CSS_SELECTOR, img_css_selector)
        print(img.get_attribute('src'))
    except Exception as e:
        print(f"Error processing index {index}: {e}")

# Close the browser
browser.quit()
to obtain all the list of matches live and red cards?

<div class="event-red-card-indicator"><span class="event-red-card-indicator__icon">
            
        </span></div>
the strange is that ispecting seems empty the div! but the red card appears on website
Reply
#13
(Dec-09-2023, 02:59 PM)nicoali Wrote: the strange is that ispecting seems empty the div! but the red card appears on website
It's load in CSS,but do need this just HTML tag staus.
.event-red-card-indicator__icon {
    background: url(https://cdn.oddspedia.com/images/static/icons/red-card.svg);
I would load source to BS can be eaiser to use when eg find many tags.
soup = BeautifulSoup(browser.page_source, 'lxml')
in_play = soup.find_all('div', class_="match-list-item match-list-item--inplay")
Reply
#14
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import chromedriver_autoinstaller
import time
import json

# Install ChromeDriver automaticamente
chromedriver_autoinstaller.install()

# Setup delle opzioni di Chrome
options = Options()
# Decommentare la riga successiva se si vuole eseguire Chrome in modalità headless
# options.add_argument("--headless")

# Creare un'istanza di Chrome WebDriver senza specificare il percorso dell'eseguibile del driver
browser = webdriver.Chrome(options=options)

# Navigare all'URL
url = 'https://oddspedia.com/football'
browser.get(url)

# Utilizzare sleep
time.sleep(3)

# Utilizzare BeautifulSoup per analizzare la sorgente della pagina
soup = BeautifulSoup(browser.page_source, 'lxml')
events = soup.find_all('script', type='application/ld+json')

# Processare i dati come necessario
for event in events:
    event_data = json.loads(event.string)
    
    # Estrai informazioni rilevanti
    event_name = event_data.get('name', '')
    event_url = event_data.get('url', '')
    event_image = event_data.get('image', [])
    start_date = event_data.get('startDate', '')
    end_date = event_data.get('endDate', '')
    
    # Verifica se 'hometeam' è presente e non è una lista vuota
    hometeam_list = event_data.get('hometeam', [])
    if hometeam_list:
        home_team = hometeam_list[0].get('name', '')
        home_team_image = hometeam_list[0].get('image', '')
    else:
        home_team = home_team_image = ''
    
    # Verifica se 'awayteam' è presente e non è una lista vuota
    awayteam_list = event_data.get('awayteam', [])
    if awayteam_list:
        away_team = awayteam_list[0].get('name', '')
        away_team_image = awayteam_list[0].get('image', '')
    else:
        away_team = away_team_image = ''
    
    # Stampare o elaborare le informazioni estratte come necessario
    print(f"Event Name: {event_name}")
    print(f"Event URL: {event_url}")
    print(f"Event Image: {event_image}")
    print(f"Start Date: {start_date}")
    print(f"End Date: {end_date}")
    print(f"Home Team: {home_team}")
    print(f"Home Team Image: {home_team_image}")
    print(f"Away Team: {away_team}")
    print(f"Away Team Image: {away_team_image}")
    print("\n")

# Chiudere il browser
browser.quit()
tryed like this!
Reply
#15
(Dec-10-2023, 10:00 AM)nicoali Wrote: tryed like this!
That 's fine and getting the Json response can make it eaiser.
Also if use a Sports betting API then only get Json back.

Also this will find nothing as now are no games are in play.
soup = BeautifulSoup(browser.page_source, 'lxml')
in_play = soup.find_all('div', class_="match-list-item match-list-item--inplay")
Now all games are in no score mode.
no_score = soup.find_all('div', class_="match-list-item match-list-item--no-score")
>>> print(no_score[0].text)
 17:00     
        Valerenga
        
        Kristiansund BK
       (2 - 0)      

>>> print(no_score[1].text)
 15:00     
        Everton
        
        Chelsea
Reply
#16
(Dec-10-2023, 11:27 AM)snippsat Wrote: That 's fine and getting the Json response can make it eaiser.

Also this will find nothing as now are no games are in play.
soup = BeautifulSoup(browser.page_source, 'lxml')
in_play = soup.find_all('div', class_="match-list-item match-list-item--inplay")
Now all games are in no score mode.

can i mix the json data that i read and need with this that you suggest to obtain only live matches and redcards?
Reply
#17
(Dec-10-2023, 11:40 AM)nicoali Wrote: can i mix the json data that i read and need with this that you suggest to obtain only live matches and redcards?
Maybe with some work,is not like the Json data you get back is well organize like eg get from an API,just a bunch of scripts that loop over.
Reply
#18
so it's not possible to select all the live matches as does The live button in football section up on the website ?
Reply
#19
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time

# Setup
options = Options()
# options.add_argument("--headless=new")
ser = Service(r"C:\Users\carto\Desktop\chromedriver-win64\chromedriver.exe")
browser = webdriver.Chrome(service=ser, options=options)

# Parse or automation
url = 'https://oddspedia.com/football'
browser.get(url)

# Clicca sul pulsante "Live"
live_button_xpath = '//button[contains(@class, "btn-filter") and contains(text(), "Live")]'
live_button = WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, live_button_xpath)))
live_button.click()

# Attendi che la pagina sia completamente caricata
WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'main > div:nth-child(3) > div.ml__wrap > div:nth-child(16) > div')))

time.sleep(6)

# Ottieni il codice sorgente della pagina dopo il clic sul pulsante "Live"
page_source = browser.page_source

soup = BeautifulSoup(page_source, 'html.parser')

# Trova tutti gli elementi "a" con la classe "match-url--flex"
in_play_divs = soup.find_all('a', class_="match-url match-url--flex")

# Itera su ciascun elemento "in_play_div" e fai quello che serve
for div in in_play_divs:
    # Esegui il tuo scraping specifico per ciascun elemento
    # Ad esempio, stampa il testo e l'immagine all'interno di ciascun div
    print("Title:", div['title'])
    
    # Verifica se l'elemento è presente prima di cercare ulteriormente
    match_status_element = div.find('div', class_='match-status--inplay')
    if match_status_element:
        print("Status:", match_status_element.get_text(strip=True))
    else:
        print("Status not found")

    home_team_element = div.find('div', class_='match-teams').find('div', class_='match-team--home')
    if home_team_element:
        home_team_score_element = home_team_element.find('span', class_='match-score-result__score')
        if home_team_score_element:
            home_team_score = home_team_score_element.get_text(strip=True)
            print("Home Team Score:", home_team_score)
        else:
            print("Home Team Score not found")
        
        home_team_logo_element = home_team_element.find('img')
        if home_team_logo_element:
            home_team_logo = home_team_logo_element['src']
            print("Home Team Logo:", home_team_logo)
        else:
            print("Home Team Logo not found")
    else:
        print("Home Team not found")

    away_team_element = div.find('div', class_='match-teams').find('div', class_='match-team--away')
    if away_team_element:
        away_team_score_element = away_team_element.find('span', class_='match-score-result__score')
        if away_team_score_element:
            away_team_score = away_team_score_element.get_text(strip=True)
            print("Away Team Score:", away_team_score)
        else:
            print("Away Team Score not found")

        away_team_logo_element = away_team_element.find('img')
        if away_team_logo_element:
            away_team_logo = away_team_logo_element['src']
            print("Away Team Logo:", away_team_logo)
        else:
            print("Away Team Logo not found")
    else:
        print("Away Team not found")

# Chiudi il browser alla fine
browser.quit()
with this i can click on button live but i still cannot scrape all matches logo result and links of matches!
Reply
#20
(Dec-11-2023, 04:44 PM)nicoali Wrote: with this i can click on button live but i still cannot scrape all matches logo result and links of matches!
There two stats at least after click on live button,here i do a quick test.
live_bt = browser.find_element(By.CSS_SELECTOR, 'div.match-list-sub-nav > div.match-list-filter-buttons > button:nth-child(1)')
live_bt.click()
time.sleep(3)
soup = BeautifulSoup(browser.page_source, 'lxml')
in_play = soup.find_all('div', class_="match-list-item match-list-item--inplay")
in_play_noscore = soup.find_all('div', class_="match-list-item match-list-item--inplay match-list-item--no-score")
So this is correct at time i test now late,only 3 live matches with no score
>>> len(in_play_noscore)
3
>>> len(in_play)
0
>>> print(in_play_noscore[1].text)
 
        Inplay
         
        Molynes United FC
        
        Harbour View FC
             
>>> print(in_play_noscore[2].text)
 
        Inplay
         
        AS Douanes Nouakchott
        
        Inter Nouakchott
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  Configuration file through helm charts saisankalpj 0 1,761 Aug-23-2022, 01:32 PM
Last Post: saisankalpj
  Cannot Extract data through charts online AgileAVS 0 1,876 Feb-01-2020, 01:47 PM
Last Post: AgileAVS

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020