Having Trouble With Threading - Printable Version

Hide/Show
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
import threading

# COUNTER TO INCREMENT THROUGH NEW_LIST
list_counter = 0

# CREATE NEW LIST FROM CSV
new_list = df = pd.read_csv('test.csv')  # df = dataframe

# GET TOTAL LIST ITEMS FROM CSV
list_total = len(df.index) - 1  # take away 1 other lists start at zero

def worker():
    """thread worker function"""

    global list_counter  # set the variables as global
    global new_list
    global list_total
    global df

    while list_counter <= list_total:
        scrape = requests.get(df.iloc[list_counter, 0], headers={"user-agent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36"})
        html = scrape.content
        soup = BeautifulSoup(html, 'html.parser')

        comment_search = soup.body.find_all(string=re.compile("Ethereum", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Ethereum Found'
        comment_search = soup.body.find_all(string=re.compile("Bitcoin", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Bitcoin Found'
        comment_search = soup.body.find_all(string=re.compile("Ether", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Ether Found'
        comment_search = soup.body.find_all(string=re.compile("Ripple", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Ripple Found'
        comment_search = soup.body.find_all(string=re.compile("Qtum", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Qtum'
        comment_search = soup.body.find_all(string=re.compile("Litecoin", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Litecoin'
        comment_search = soup.body.find_all(string=re.compile("DigitalCash", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'DigitalCash'
        comment_search = soup.body.find_all(string=re.compile("Monero", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Monero'
        comment_search = soup.body.find_all(string=re.compile("Zcash", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Zcash'
        comment_search = soup.body.find_all(string=re.compile("Bitcash", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Bitcash'
        comment_search = soup.body.find_all(string=re.compile("Stellar", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Stellar'
        comment_search = soup.body.find_all(string=re.compile("IOTA", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'IOTA'
        comment_search = soup.body.find_all(string=re.compile("Neo", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Neo'
        comment_search = soup.body.find_all(string=re.compile("Power Ledger", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Power Ledger'
        comment_search = soup.body.find_all(string=re.compile("OmiseGo", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'OmiseGo'
        comment_search = soup.body.find_all(string=re.compile("Stratis", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Stratis Found'
        comment_search = soup.body.find_all(string=re.compile("Waves", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Waves Found'
        comment_search = soup.body.find_all(string=re.compile("Walton", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Walton Found'
        comment_search = soup.body.find_all(string=re.compile("Hshare", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Hshare Found'
        comment_search = soup.body.find_all(string=re.compile("Einsteinium", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Einsteinium Found'
        comment_search = soup.body.find_all(string=re.compile("Stellar Lumens", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Stellar Found'
        comment_search = soup.body.find_all(string=re.compile("Lisk", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'Lisk Found'
        comment_search = soup.body.find_all(string=re.compile("NEM", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'NEM Found'
        comment_search = soup.body.find_all(string=re.compile("MonaCoin", re.IGNORECASE))
        if len(comment_search) > 0:
            df.iloc[list_counter, 1] = 'MonaCoin Found'

        print(list_counter)
        list_counter = list_counter + 1

        df = df.replace(np.nan, 'NONE', regex=True)  # replace all empty cells (nan) with 'none'
        df.to_csv("test2.csv")

        return  # returns the results outside the function into a variable

threads = []
for i in range(20):
    t = threading.Thread(target=worker)
    threads.append(t)
    t.start()