import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
import threading
# COUNTER TO INCREMENT THROUGH NEW_LIST
list_counter = 0
# CREATE NEW LIST FROM CSV
new_list = df = pd.read_csv('test.csv') # df = dataframe
# GET TOTAL LIST ITEMS FROM CSV
list_total = len(df.index) - 1 # take away 1 other lists start at zero
def worker():
"""thread worker function"""
global list_counter # set the variables as global
global new_list
global list_total
global df
while list_counter <= list_total:
scrape = requests.get(df.iloc[list_counter, 0], headers={"user-agent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36"})
html = scrape.content
soup = BeautifulSoup(html, 'html.parser')
comment_search = soup.body.find_all(string=re.compile("Ethereum", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Ethereum Found'
comment_search = soup.body.find_all(string=re.compile("Bitcoin", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Bitcoin Found'
comment_search = soup.body.find_all(string=re.compile("Ether", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Ether Found'
comment_search = soup.body.find_all(string=re.compile("Ripple", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Ripple Found'
comment_search = soup.body.find_all(string=re.compile("Qtum", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Qtum'
comment_search = soup.body.find_all(string=re.compile("Litecoin", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Litecoin'
comment_search = soup.body.find_all(string=re.compile("DigitalCash", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'DigitalCash'
comment_search = soup.body.find_all(string=re.compile("Monero", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Monero'
comment_search = soup.body.find_all(string=re.compile("Zcash", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Zcash'
comment_search = soup.body.find_all(string=re.compile("Bitcash", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Bitcash'
comment_search = soup.body.find_all(string=re.compile("Stellar", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Stellar'
comment_search = soup.body.find_all(string=re.compile("IOTA", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'IOTA'
comment_search = soup.body.find_all(string=re.compile("Neo", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Neo'
comment_search = soup.body.find_all(string=re.compile("Power Ledger", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Power Ledger'
comment_search = soup.body.find_all(string=re.compile("OmiseGo", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'OmiseGo'
comment_search = soup.body.find_all(string=re.compile("Stratis", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Stratis Found'
comment_search = soup.body.find_all(string=re.compile("Waves", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Waves Found'
comment_search = soup.body.find_all(string=re.compile("Walton", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Walton Found'
comment_search = soup.body.find_all(string=re.compile("Hshare", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Hshare Found'
comment_search = soup.body.find_all(string=re.compile("Einsteinium", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Einsteinium Found'
comment_search = soup.body.find_all(string=re.compile("Stellar Lumens", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Stellar Found'
comment_search = soup.body.find_all(string=re.compile("Lisk", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'Lisk Found'
comment_search = soup.body.find_all(string=re.compile("NEM", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'NEM Found'
comment_search = soup.body.find_all(string=re.compile("MonaCoin", re.IGNORECASE))
if len(comment_search) > 0:
df.iloc[list_counter, 1] = 'MonaCoin Found'
print(list_counter)
list_counter = list_counter + 1
df = df.replace(np.nan, 'NONE', regex=True) # replace all empty cells (nan) with 'none'
df.to_csv("test2.csv")
return # returns the results outside the function into a variable
threads = []
for i in range(20):
t = threading.Thread(target=worker)
threads.append(t)
t.start()