Python Forum
I have this coding exercise to find how much money take Barrack Obama and Romney Mitt
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
I have this coding exercise to find how much money take Barrack Obama and Romney Mitt
#1
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from collections import defaultdict

matplotlib.style.use('ggplot')
pd.set_option("display.max_columns", None)
headers=pd.read_csv('https://www.fec.gov/files/bulk-downloads/data_dictionaries/indiv_header_file.csv')
headers
fec = pd.read_csv('itcont.txt', sep="|", nrows=10, names=headers.columns)
fec.head(5)
data_types = { header: str for header in headers }
data_types['TRANSACTION_AMT'] = float
data_types
fec = pd.read_csv('itcont.txt',
              	sep="|",
              	names=headers.columns,
              	dtype=data_types)

fec.shape
cm_headers =  pd.read_csv('https://www.fec.gov/data/browse-data/files/bulk-downloads/data_dictionaries/cm_header_file.csv')
cm_headers
cm = pd.read_csv('cm.txt',
              	sep="|",
              	names=cm_headers.columns)
print(cm.shape)
cm.head(5)
fec = pd.merge(fec,
           	cm[['CMTE_ID', 'CMTE_NM', 'CAND_ID']],
           	on='CMTE_ID',
           	how='inner')

print(fec.shape)
fec.head(5)
cn_headers =  pd.read_csv('https://www.fec.gov/data/browse-data/files/bulk-downloads/data_dictionaries/cn_header_file.csv')
cn_headers
cn = pd.read_csv('cn.txt',
              	sep="|",
              	names=cn_headers.columns)
cn.head(5)
cn['CAND_NAME'].unique()
cn['CAND_NAME'].unique().shape
cn['CAND_NAME'] = cn['CAND_NAME'].astype(str)
cn[cn['CAND_NAME'].str.contains('OBAMA')]
cn[cn['CAND_NAME'].str.contains('ROMNEY')]
print(cn['CAND_NAME'].dtype)

cn[cn['CAND_NAME'].isin(['OBAMA, BARACK', 'ROMNEY, MITT / RYAN, PAUL D.'])]
cn.loc[cn['CAND_NAME'].str.contains('ROMNEY'), 'CAND_NAME'].values
cn.loc[cn['CAND_NAME'].str.contains('ROMNEY'), 'CAND_NAME'] = 'ROMNEY, MITT'
cn[cn['CAND_NAME'].str.contains('ROMNEY')]['CAND_NAME'].values
cn_bomr = cn[cn['CAND_NAME'].isin(['OBAMA, BARACK', 'ROMNEY, MITT'])]
merged = pd.merge(fec, cn_bomr[['CAND_ID', 'CAND_NAME', 'CAND_PTY_AFFILIATION']], on='CAND_ID', how='inner')
merged['CAND_PTY_AFFILIATION'].plot(kind='barh', color=('deepskyblue', 'salmon'), figsize=(10, 8))
print(merged)
fec = pd.merge(fec, cn_bomr, on='CAND_ID',
           	how='inner')

print(fec.shape)
fec.head(5)
fec['CAND_NAME'].value_counts()
(fec['TRANSACTION_AMT'] > 0).value_counts()
fec = fec.loc[fec['TRANSACTION_AMT'] > 0]
fec['OCCUPATION'].value_counts()[:20]
by_occupation = fec.pivot_table(index='OCCUPATION',
                            	columns='CAND_PTY_AFFILIATION',
                            	values='TRANSACTION_AMT',
                            	aggfunc='sum')
by_occupation
by_occupation['SUM'] = by_occupation.sum(axis=1)
by_occupation
over_10mm = by_occupation.loc[by_occupation['SUM'] >= 10**7]
over_10mm
occ_mapping = {
	'INFORMATION REQUESTED PER BEST EFFORTS' : 'NOT PROVIDED',
	'INFORMATION REQUESTED' : 'NOT PROVIDED',
	'C.E.O.': 'CEO',
	'C.E.O': 'CEO'
}

# If no mapping provided, return x
f = lambda x: occ_mapping.get(x, x)
fec.loc[:, 'OCCUPATION'] = fec.loc[:, 'OCCUPATION'].map(f)
by_occupation = fec.pivot_table(index='OCCUPATION',
                            	columns='CAND_PTY_AFFILIATION',
                            	values='TRANSACTION_AMT',
                            	aggfunc='sum')
by_occupation['SUM'] = by_occupation.sum(axis=1)
over_10mm = by_occupation.loc[by_occupation['SUM'] >= 10**7]
over_10mm
over_10mm.sort_values(by='SUM', ascending=False)
print(over_10mm.columns)
over_10mm_sorted = over_10mm.sort_values(by='SUM', ascending=True)

# Select only the desired columns 'DEM' and 'REP' after sorting
selected_columns = ['SUM']
selected_data = over_10mm_sorted[selected_columns]
plot(kind='barh', color=('deepskyblue', 'salmon'), figsize=(10, 8))
over_10mm_pct = over_10mm[['DEM', 'REP']].div(over_10mm['SUM'], axis=0)
over_10mm_pct
_ = over_10mm_pct.plot(kind='barh', color=('deepskyblue', 'salmon'), stacked=True, figsize=(10, 8))
plt.xlim((0, 1))
plt.legend(loc='center left', bbox_to_anchor=(1.01, 0.5))
fec.groupby('CAND_NAME').sum()
IndexError: index 0 is out of bounds for axis 0 with size 0 give me that error and i dont know what i can do i am stuck if anyone can help me i will be greatful.
buran write Mar-18-2024, 07:49 AM:
Please, use proper tags when post code, traceback, output, etc. This time I have added tags for you.
See BBcode help for more info.

Please, read What to include in a post
Reply


Messages In This Thread
I have this coding exercise to find how much money take Barrack Obama and Romney Mitt - by vasiliskarv - Mar-17-2024, 05:21 PM

Possibly Related Threads…
Thread Author Replies Views Last Post
  Banking system - transferring money 3DEN 2 8,977 Dec-13-2019, 09:13 AM
Last Post: 3DEN
  Money Global Variable saturnstars 8 4,392 Apr-12-2019, 10:48 AM
Last Post: ichabod801
  Money Bags Program PythonNoob123 2 7,853 Jan-23-2017, 10:42 PM
Last Post: PythonNoob123

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020