Browse Source

combining all team batting data into single file. correlation script.

master
Charles Reid 7 years ago
parent
commit
c0a8fc531a
  1. 67
      AthleticsCorr.py
  2. 9
      AthleticsWHHRKDE.py
  3. 25
      CombineEverybody.py
  4. 2535
      data/master_team_batting.csv

67
AthleticsCorr.py

@ -0,0 +1,67 @@
print "Loading pandas..."
from pandas import *
print "Done loading pandas."
import matplotlib.pylab as plt
import numpy as np
from scipy import stats
def load_data():
df = read_csv('data/oak_team_batting.csv',index_col='Rk')
# Data Cleanup:
# -----------------------
# Let's exclude some years:
# - 2014 (incomplete season)
# - 1994 (the baseball strike)
df = df[ df['Year'] != 1994 ]
df = df[ df['Year'] != 2014 ]
# Add in data about singles
df['1B'] = df['H'] - df['2B'] - df['3B'] - df['HR']
df['ABpG'] = df['AB']/df['G']
df['BBpG'] = df['BB']/df['G']
df['1BpG'] = df['1B']/df['G']
df['2BpG'] = df['2B']/df['G']
df['3BpG'] = df['3B']/df['G']
df['HRpG'] = df['HR']/df['G']
df['Avg'] = (df['H']/df['AB'])
return df
df = load_data()
# Time series of data
# --------------------------------
fig = plt.figure()
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
ax1.plot(df['Year'],df['W']/df['G'],'-k',label='WinAvg')
ax1.plot(df['Year'],df['Avg'],'-g',label='BatAvg')
ax2.plot(df['Year'],df['BBpG'],'-b',label='BBpG')
ax2.plot(df['Year'],df['1BpG'],'-r',label='1BpG')
ax2.plot(df['Year'],df['2BpG'],'-g',label='2BpG')
ax2.plot(df['Year'],df['3BpG'],'-c',label='3BpG')
ax2.plot(df['Year'],df['HRpG'],'-k',label='HRpG')
plt.show()

9
AthleticsWHHRKDE.py

@ -24,6 +24,13 @@ def load_data():
# Add in data about singles
df['1B'] = df['H'] - df['2B'] - df['3B'] - df['HR']
df['ABpG'] = df['AB']/df['G']
df['BBpG'] = df['BB']/df['G']
df['1BpG'] = df['1B']/df['G']
df['2BpG'] = df['2B']/df['G']
df['3BpG'] = df['3B']/df['G']
df['HRpG'] = df['HR']/df['G']
return df
@ -71,6 +78,7 @@ plt.show()
# Hits
# -----------------------
@ -148,3 +156,4 @@ for key,title in zip(keys,titles):
plt.show()

25
CombineEverybody.py

@ -0,0 +1,25 @@
from pandas import *
teams = ['ana','ari','atl','bal',
'bos','chc','chw','cin',
'cle','col','det','fla',
'hou','kcr','lad','mil',
'min','nym','nyy','oak',
'phi','pit','sdp','sea',
'sfg','stl','tbd','tex',
'tor','wsn']
master_df = DataFrame()
for team in teams:
print "Processing team "+team
df = read_csv('data/'+team+'_team_batting.csv')
df['Team'] = team
master_df = master_df.append(df,ignore_index=True)
master_df.to_csv('data/master_team_batting.csv',index=False)
print "Done."

2535
data/master_team_batting.csv

File diff suppressed because it is too large
Loading…
Cancel
Save