Browse Source

breaking out plotting stuff into separate analyze scripts. for organizational logic.

master
Charles Reid 10 years ago
parent
commit
5d4c449394
  1. 2
      .gitignore
  2. 206
      analysis/Analyze15002.py
  3. 251
      analysis/Analyze17001.py
  4. 108
      analysis/Analyze25013.py
  5. 138
      analysis/Analyze25070.py
  6. 250
      analysis/OneCity_Univariate.py
  7. 66
      analysis/Table25013.py

2
.gitignore vendored

@ -1,3 +1,5 @@ @@ -1,3 +1,5 @@
*.file
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

206
analysis/OneCity_Scatter.py → analysis/Analyze15002.py

@ -12,10 +12,7 @@ from scipy import stats @@ -12,10 +12,7 @@ from scipy import stats
import metro as mt
do_univariate_kde = True
cities = ['Seattle']#,'Houston','Los Angeles','New York','Chicago']
cities = ['Seattle']
def main():
@ -30,7 +27,6 @@ def do_15002(): @@ -30,7 +27,6 @@ def do_15002():
# Seaborn
sns.set_palette("deep", desat=.6)
sns.set_context(rc={"figure.figsize": (8, 4)})
c1, c2, c3 = sns.color_palette("Set1", 3)
# Mongo
client = MongoClient()
@ -72,8 +68,72 @@ def do_15002(): @@ -72,8 +68,72 @@ def do_15002():
df = Table15002(df)
colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple"]
xcp = sns.xkcd_palette(colors)
#######################
# Bar plot:
#
# pct of population in each ed category
f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(8, 8))
# ------------------------
# Subplot 1
ax1.set_title(cities[0])
#ddm = {}
#ddf = {}
ddt = {}
for i in range(1,5+1):
#ddm[str(i)] = df["M_EdCat%d_Pct"%(i)].values
#ddf[str(i)] = df["F_EdCat%d_Pct"%(i)].values
ddt[str(i)] = df["EdCat%d_Pct"%(i)].values
######################
######################
### Wrong!!!
### yy is a vector
### this only plots
### the first element
######################
######################
xx = np.array(ddt.keys())
yy = np.array(ddt.values())
sns.barplot(xx,yy,palette=xcp,ax=ax1)
#xx = np.array(ddm.keys())
#yy = np.array(ddm.values())
#sns.barplot(xx,yy,palette=xcp,ax=ax2)
#xx = np.array(ddf.keys())
#yy = np.array(ddf.values())
#sns.barplot(xx,yy,palette=xcp,ax=ax3)
plt.show()
plt.draw()
#######################
# Scatter plot:
#
# average vs variance
# average vs gender imbalance
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
@ -134,6 +194,9 @@ def do_15002(): @@ -134,6 +194,9 @@ def do_15002():
#######################
# Scatter plot:
#
# variances vs averages
# variances vs gender imbalance
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
@ -186,131 +249,28 @@ def do_15002(): @@ -186,131 +249,28 @@ def do_15002():
sns.regplot(df[xcode],df[ycode], color=c3, ax=ax2, fit_reg=False)
plt.draw()
plt.show()
def do_17001():
from Table17001 import Table17001
# Seaborn
sns.set_palette("deep", desat=.6)
sns.set_context(rc={"figure.figsize": (8, 4)})
c1, c2, c3 = sns.color_palette("Set1", 3)
# Mongo
client = MongoClient()
db = client['metros']
metaprops = db['PropertiesMeta']
props = db['Properties']
for city in cities:
#########################
# Mongo lookup
cbsa = mt.CBSACode(city)[0]
pre_search = metaprops.find_one({'geoid':cbsa})
mongo_search = props.find({'$and': [
{'metroid':cbsa},
{'geoid':{'$nin':[cbsa]}}
]
})
if 'B17001' not in pre_search['tables']:
print "Error: could not find table B17001 for city",city,"in db."
continue
if mongo_search.count()==0:
print "Error: could not find city",city,"in db."
continue
df = pd.DataFrame([])
srch = list(mongo_search)
for i,r in enumerate(srch):
if cbsa in r['geoid']:
del srch[i]
break
df = df.append(srch)
df = Table17001(df)
#######################
# Scatter plots:
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
# ------------------------
# Subplot 1
ax1.set_title(cities[0])
#xcode = 'B17001003'
#xlabel = metaprops.find_one({'code':xcode})['name']
#ycode = 'B17001031'
#ylabel = metaprops.find_one({'code':ycode})['name']
xcode = 'TotalPop'
xlabel=xcode
ycode = 'M_P_PctM'
ylabel=ycode
sns.regplot(df[xcode],df[ycode], color=c2, ax=ax1, fit_reg=False)
xcode = 'TotalPop'
xlabel=xcode
ycode = 'F_P_PctF'
ylabel=ycode
sns.regplot(df[xcode],df[ycode], color=c1, ax=ax1, fit_reg=False)
xlim = (min(df[xcode]),
max(df[xcode]))
ylim = (min(df[ycode]),
max(df[ycode]))
ax1.set_xlim(xlim)
ax1.set_ylim(ylim)
ax1.set_xlabel(xlabel)
ax1.set_ylabel('Pct of M/F Pop in Pov')
# ------------------------
# Subplot 2
xcode = 'TotalPop'
xlabel=xcode
ycode = 'SrM_P_PctM'
ylabel=ycode
sns.regplot(df[xcode],df[ycode], color=c2, ax=ax2, fit_reg=False)
#sns.regplot(df[xcode],df[ycode] - ((df['TotalPop']*df[ycode])/df['TotalPop']), color=c1, ax=ax2, fit_reg=False)
xcode = 'TotalPop'
xlabel=xcode
ycode = 'SrF_P_PctF'
ylabel=ycode
sns.regplot(df[xcode],df[ycode], color=c1, ax=ax2, fit_reg=False)
#sns.regplot(df[xcode],abs(df[ycode] - df[ycode].mean()), color=c1, ax=ax2, fit_reg=False)
xlim = (min(df[xcode]),
max(df[xcode]))
ylim = (min(df[ycode]),
max(df[ycode]))
ax2.set_xlim(xlim)
ax2.set_ylim(ylim)
ax2.set_xlabel(xlabel)
#ax2.set_ylabel('Pct of M/F Pre-Adol. Pop in Pov')
plt.draw()
plt.show()

251
analysis/Analyze17001.py

@ -0,0 +1,251 @@ @@ -0,0 +1,251 @@
from pymongo import MongoClient
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from scipy import stats
import metro as mt
do_univariate_kde = True
cities = ['Seattle']#,'Houston','Los Angeles','New York','Chicago']
def main():
do_17001()
def do_17001():
from Table17001 import Table17001
# Seaborn
sns.set_palette("deep", desat=.6)
sns.set_context(rc={"figure.figsize": (8, 4)})
c1, c2, c3 = sns.color_palette("Set1", 3)
# Mongo
client = MongoClient()
db = client['metros']
metaprops = db['PropertiesMeta']
props = db['Properties']
for city in cities:
#########################
# Mongo lookup
cbsa = mt.CBSACode(city)[0]
pre_search = metaprops.find_one({'geoid':cbsa})
mongo_search = props.find({'$and': [
{'metroid':cbsa},
{'geoid':{'$nin':[cbsa]}}
]
})
if 'B17001' not in pre_search['tables']:
print "Error: could not find table B17001 for city",city,"in db."
continue
if mongo_search.count()==0:
print "Error: could not find city",city,"in db."
continue
df = pd.DataFrame([])
srch = list(mongo_search)
for i,r in enumerate(srch):
if cbsa in r['geoid']:
del srch[i]
break
df = df.append(srch)
df = Table17001(df)
#######################
# Scatter plots:
#
# Total population vs male/female poverty rate
# Total population vs male/female senior poverty rate
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
# ------------------------
# Subplot 1
ax1.set_title(cities[0])
#xcode = 'B17001003'
#xlabel = metaprops.find_one({'code':xcode})['name']
#ycode = 'B17001031'
#ylabel = metaprops.find_one({'code':ycode})['name']
xcode = 'TotalPop'
xlabel=xcode
ycode = 'M_P_PctM'
ylabel=ycode
sns.regplot(df[xcode],df[ycode], color=c2, ax=ax1, fit_reg=False)
xcode = 'TotalPop'
xlabel=xcode
ycode = 'F_P_PctF'
ylabel=ycode
sns.regplot(df[xcode],df[ycode], color=c1, ax=ax1, fit_reg=False)
xlim = (min(df[xcode]),
max(df[xcode]))
ylim = (min(df[ycode]),
max(df[ycode]))
ax1.set_xlim(xlim)
ax1.set_ylim(ylim)
ax1.set_xlabel(xlabel)
ax1.set_ylabel('Pct of M/F Pop in Pov')
# ------------------------
# Subplot 2
xcode = 'TotalPop'
xlabel=xcode
ycode = 'SrM_P_PctM'
ylabel=ycode
sns.regplot(df[xcode],df[ycode], color=c2, ax=ax2, fit_reg=False)
#sns.regplot(df[xcode],df[ycode] - ((df['TotalPop']*df[ycode])/df['TotalPop']), color=c1, ax=ax2, fit_reg=False)
xcode = 'TotalPop'
xlabel=xcode
ycode = 'SrF_P_PctF'
ylabel=ycode
sns.regplot(df[xcode],df[ycode], color=c1, ax=ax2, fit_reg=False)
#sns.regplot(df[xcode],abs(df[ycode] - df[ycode].mean()), color=c1, ax=ax2, fit_reg=False)
xlim = (min(df[xcode]),
max(df[xcode]))
ylim = (min(df[ycode]),
max(df[ycode]))
ax2.set_xlim(xlim)
ax2.set_ylim(ylim)
ax2.set_xlabel(xlabel)
ax2.set_ylabel('Pct M/F Seniors in Poverty')
plt.draw()
plt.show()
#######################
# Plot:
# Percent of people in poverty
# overall, and by gender
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
#ax1.set_title(cities[0])
sns.kdeplot(df['P_Pct'], shade=True, color=c3, ax=ax1)
sns.kdeplot(df['M_P_PctM'], shade=True, color=c2, ax=ax2)
sns.kdeplot(df['F_P_PctF'], shade=True, color=c1, ax=ax2);
ax1.set_xlim([0,1])
ax2.set_xlim([0,1])
#
#ax1.set_ylim([0,ylim])
#ax2.set_ylim([0,ylim])
#######################
# Plot:
# Pre-adolescent poverty levels
# (overall and by gender)
xlim = 0.50
ylim = 5
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
#ax1.set_title(cities[0] + " " + cities[1] + "Pre-Adolesc")
sns.kdeplot( df['PAP_Pct'], shade=True, color=c3, ax=ax1)
#sns.kdeplot(df2['PAP_Pct'], shade=True, color=c2, ax=ax1)
sns.kdeplot( df['PAM_P_PctM'], shade=True, color=c2, ax=ax2)
sns.kdeplot( df['PAF_P_PctF'], shade=True, color=c1, ax=ax2);
ax1.set_xlim([0,1])
ax2.set_xlim([0,1])
#
#ax1.set_ylim([0,ylim])
#ax2.set_ylim([0,ylim])
#f.savefig("onecity_univariate_"+city+"_2.jpg")
### #######################
### # Plot:
### # Juvenile poverty levels
### # (overall and by gender)
### f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
### #ax1.set_title(cities[0] + " " + cities[1] + "Juveniles")
### sns.kdeplot( df['JuvP_Pct'], shade=True, color=c3, ax=ax1)
### #sns.kdeplot(df2['JuvP_Pct'], shade=True, color=c2, ax=ax1)
### sns.kdeplot( df['JuvM_P_PctM'], shade=True, color=c2, ax=ax2)
### sns.kdeplot( df['JuvF_P_PctF'], shade=True, color=c1, ax=ax2);
###
### ax1.set_xlim([0,1])
### ax2.set_xlim([0,1])
### #
### #ax1.set_ylim([0,ylim])
### #ax2.set_ylim([0,ylim])
###
### #f.savefig("onecity_univariate_"+city+"_3.jpg")
#######################
# Plot:
# Senior poverty levels
# (overall and by gender)
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
#ax1.set_title(cities[0] + " " + cities[1] + "Seniors")
sns.kdeplot( df['SrP_Pct'], shade=True, color=c3, ax=ax1)
#sns.kdeplot(df2['SrP_Pct'], shade=True, color=c2, ax=ax1)
sns.kdeplot( df['SrM_P_PctM'], shade=True, color=c2, ax=ax2)
sns.kdeplot( df['SrF_P_PctF'], shade=True, color=c1, ax=ax2);
ax1.set_xlim([0,1])
ax2.set_xlim([0,1])
#
#ax1.set_ylim([0,ylim])
#ax2.set_ylim([0,ylim])
#f.savefig(cities[0]+"_g2_3.jpg")
if __name__=="__main__":
main()

108
analysis/Analyze25013.py

@ -0,0 +1,108 @@ @@ -0,0 +1,108 @@
from pymongo import MongoClient
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from scipy import stats
import metro as mt
do_univariate_kde = True
cities = ['Seattle']#,'Houston','Los Angeles','New York','Chicago']
def main():
do_25013()
def do_25013():
from Table25013 import Table25013
# Seaborn
sns.set_palette("deep", desat=.6)
sns.set_context(rc={"figure.figsize": (8, 4)})
# Mongo
client = MongoClient()
db = client['metros']
metaprops = db['PropertiesMeta']
props = db['Properties']
for city in cities:
#########################
# Mongo lookup
cbsa = mt.CBSACode(city)[0]
pre_search = metaprops.find_one({'geoid':cbsa})
mongo_search = props.find({'$and': [
{'metroid':cbsa},
{'geoid':{'$nin':[cbsa]}}
]
})
if 'B25070' not in pre_search['tables']:
print "Error: could not find table B25070 for city",city,"in db."
continue
if mongo_search.count()==0:
print "Error: could not find city",city,"in db."
continue
df = pd.DataFrame([])
srch = list(mongo_search)
for i,r in enumerate(srch):
if cbsa in r['geoid']:
del srch[i]
break
df = df.append(srch)
df = Table25013(df)
#######################
#
# renters vs owners
f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(12, 6))
# ------------------------
# Subplot 1
ax1.set_title(cities[0])
cmap = sns.cubehelix_palette(8, start=0.5, rot=-.75, as_cmap=True)
sns.kdeplot(df['HousePct'],df['RentPct'],shade=True,cmap=cmap,ax=ax1)
ax1.set_xlim([0,1])
ax1.set_ylim([0,1])
#######################
#
#
f, ax1 = plt.subplots(1, 2, sharex=True, figsize=(12, 6))
sns.cubehelix_palette(8, start=.8, rot=-.55)
plt.show()
plt.draw()
if __name__=="__main__":
main()

138
analysis/OneCity_Bar.py → analysis/Analyze25070.py

@ -89,7 +89,6 @@ def do_25070(): @@ -89,7 +89,6 @@ def do_25070():
xx = np.array(ddt.keys())
yy = np.array(ddt.values())
import pdb; pdb.set_trace()
sns.barplot(xx,yy,palette="Set1",ax=ax1)
#xx = np.array(ddm.keys())
@ -105,111 +104,91 @@ def do_25070(): @@ -105,111 +104,91 @@ def do_25070():
#######################
# univariate KDE
#
# density of (rent as pct of income) categories
f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(12, 6))
# ------------------------
# Subplot 1
ax1.set_title(cities[0])
labels = ["0-10","10-15","15-20","20-25","25-30","30-35","35-40","40-50","50+"]
colpal = sns.color_palette("GnBu_d",len(labels))
def do_15002():
from Table15002 import Table15002
ddt = {}
for i,lab in enumerate(labels):
tlab = "Rent_"+lab+"_Pct"
sns.kdeplot(df[tlab], shade=True, color=colpal[i], ax=ax1)
ax1.set_xlim([0.001,1.0])
# Seaborn
sns.set_palette("deep", desat=.6)
sns.set_context(rc={"figure.figsize": (8, 4)})
# Mongo
client = MongoClient()
db = client['metros']
metaprops = db['PropertiesMeta']
props = db['Properties']
for city in cities:
#########################
# Mongo lookup
cbsa = mt.CBSACode(city)[0]
pre_search = metaprops.find_one({'geoid':cbsa})
mongo_search = props.find({'$and': [
{'metroid':cbsa},
{'geoid':{'$nin':[cbsa]}}
]
})
if 'B15002' not in pre_search['tables']:
print "Error: could not find table B15002 for city",city,"in db."
continue
if mongo_search.count()==0:
print "Error: could not find city",city,"in db."
continue
#######################
# joint KDE
#
# joint density of
# (rent 10-15% of income)
# (rent 40-50% of income)
df = pd.DataFrame([])
srch = list(mongo_search)
for i,r in enumerate(srch):
if cbsa in r['geoid']:
del srch[i]
break
df = df.append(srch)
df = Table15002(df)
#f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(12, 6))
#ax1.set_title(cities[0])
#cmap = sns.cubehelix_palette(8, start=0.5, rot=-.75, as_cmap=True)
#tlab1 = "Rent_10-15_Pct"
#tlab2 = "Rent_40-50_Pct"
#sns.kdeplot(df[tlab1], df[tlab2], shade=True, cmap=cmap, ax=ax1)
colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple"]
xcp = sns.xkcd_palette(colors)
#######################
# Bar plot:
f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(8, 8))
# pair grid
# multivariate whiz-bang plot
#
# joint scatterplots
# univariate kdes
# joint kdes
# (x/y limits default to -3, +3)
# ------------------------
# Subplot 1
ax1.set_title(cities[0])
#ddm = {}
#ddf = {}
ddt = {}
for i in range(1,5+1):
#ddm[str(i)] = df["M_EdCat%d_Pct"%(i)].values
#ddf[str(i)] = df["F_EdCat%d_Pct"%(i)].values
ddt[str(i)] = df["EdCat%d_Pct"%(i)].values
#columnlabels = ["Rent_%s_Pct"%(lab) for lab in labels]
#cmap = sns.cubehelix_palette(8, start=0.5, rot=-.75, as_cmap=True)
#g = sns.PairGrid(df[columnlabels], diag_sharey=False)
#g.map_lower(sns.kdeplot, cmap=cmap)
#g.map_upper(plt.scatter)
#g.map_diag(sns.kdeplot, lw=2)
######################
######################
### Wrong!!!
### yy is a vector
### this only plots
### the first element
######################
######################
##########################
# pair grid
#
# grouped to have fewer variables
#new_labels = ["0-10","10-20","20-30","30-40","40-50","50+"]
#colpal = sns.color_palette("GnBu_d",len(new_labels))
#columnlabels = ["NewRent_%s_Pct"%(lab) for lab in new_labels]
#cmap = sns.cubehelix_palette(8, start=0.5, rot=-.75, as_cmap=True)
#g = sns.PairGrid(df[columnlabels], diag_sharey=False)
#g.map_lower(sns.kdeplot, cmap=cmap)
#g.map_upper(plt.scatter)
#g.map_diag(sns.kdeplot, lw=2)
xx = np.array(ddt.keys())
yy = np.array(ddt.values())
sns.barplot(xx,yy,palette=xcp,ax=ax1)
#xx = np.array(ddm.keys())
#yy = np.array(ddm.values())
#sns.barplot(xx,yy,palette=xcp,ax=ax2)
#xx = np.array(ddf.keys())
#yy = np.array(ddf.values())
#sns.barplot(xx,yy,palette=xcp,ax=ax3)
plt.show()
plt.draw()
plt.draw()
plt.show()
@ -219,4 +198,3 @@ def do_15002(): @@ -219,4 +198,3 @@ def do_15002():
if __name__=="__main__":
main()

250
analysis/OneCity_Univariate.py

@ -1,250 +0,0 @@ @@ -1,250 +0,0 @@
from pymongo import MongoClient
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from scipy import stats
import metro as mt
do_univariate_kde = True
cities = ['Seattle']#,'Houston','Los Angeles','New York','Chicago']
def main():
do_25070()
def do_25070():
from Table25070 import Table25070
# Seaborn
sns.set_palette("deep", desat=.6)
sns.set_context(rc={"figure.figsize": (8, 4)})
# Mongo
client = MongoClient()
db = client['metros']
metaprops = db['PropertiesMeta']
props = db['Properties']
for city in cities:
#########################
# Mongo lookup
cbsa = mt.CBSACode(city)[0]
pre_search = metaprops.find_one({'geoid':cbsa})
mongo_search = props.find({'$and': [
{'metroid':cbsa},
{'geoid':{'$nin':[cbsa]}}
]
})
if 'B25070' not in pre_search['tables']:
print "Error: could not find table B25070 for city",city,"in db."
continue
if mongo_search.count()==0:
print "Error: could not find city",city,"in db."
continue
df = pd.DataFrame([])
srch = list(mongo_search)
for i,r in enumerate(srch):
if cbsa in r['geoid']:
del srch[i]
break
df = df.append(srch)
df = Table25070(df)
#######################
# Bar plot:
f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(12, 6))
# ------------------------
# Subplot 1
ax1.set_title(cities[0])
labels = ["0-10","10-15","15-20","20-25","25-30","30-35","35-40","40-50","50+"]
colpal = sns.color_palette("GnBu_d",len(labels))
ddt = {}
for i,lab in enumerate(labels):
tlab = "Rent_"+lab+"_Pct"
sns.kdeplot(df[tlab], shade=True, color=colpal[i], ax=ax1)
ax1.set_xlim([0.0,1.0])
plt.show()
plt.draw()
def do_17001():
from Table17001 import Table17001
# Seaborn
sns.set_palette("deep", desat=.6)
sns.set_context(rc={"figure.figsize": (8, 4)})
c1, c2, c3 = sns.color_palette("Set1", 3)
# Mongo
client = MongoClient()
db = client['metros']
metaprops = db['PropertiesMeta']
props = db['Properties']
if do_univariate_kde:
for city in cities:
#########################
# Mongo lookup
cbsa = mt.CBSACode(city)[0]
pre_search = metaprops.find_one({'geoid':cbsa})
mongo_search = props.find({'$and': [
{'metroid':cbsa},
{'geoid':{'$nin':[cbsa]}}
]
})
if 'B17001' not in pre_search['tables']:
print "Error: could not find table B17001 for city",city,"in db."
continue
if mongo_search.count()==0:
print "Error: could not find city",city,"in db."
continue
df = pd.DataFrame([])
srch = list(mongo_search)
for i,r in enumerate(srch):
if cbsa in r['geoid']:
del srch[i]
break
df = df.append(srch)
df = Table17001(df)
#######################
# Plot:
# Percent of people in poverty
# overall, and by gender
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
#ax1.set_title(cities[0])
sns.kdeplot(df['P_Pct'], shade=True, color=c3, ax=ax1)
sns.kdeplot(df['M_P_PctM'], shade=True, color=c2, ax=ax2)
sns.kdeplot(df['F_P_PctF'], shade=True, color=c1, ax=ax2);
ax1.set_xlim([0,1])
ax2.set_xlim([0,1])
#
#ax1.set_ylim([0,ylim])
#ax2.set_ylim([0,ylim])
#f.savefig("onecity_univariate_"+city+"_1.jpg")
#######################
# Plot:
# Pre-adolescent poverty levels
# (overall and by gender)
xlim = 0.50
ylim = 5
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
#ax1.set_title(cities[0] + " " + cities[1] + "Pre-Adolesc")
sns.kdeplot( df['PAP_Pct'], shade=True, color=c3, ax=ax1)
#sns.kdeplot(df2['PAP_Pct'], shade=True, color=c2, ax=ax1)
sns.kdeplot( df['PAM_P_PctM'], shade=True, color=c2, ax=ax2)
sns.kdeplot( df['PAF_P_PctF'], shade=True, color=c1, ax=ax2);
ax1.set_xlim([0,1])
ax2.set_xlim([0,1])
#
#ax1.set_ylim([0,ylim])
#ax2.set_ylim([0,ylim])
#f.savefig("onecity_univariate_"+city+"_2.jpg")
### #######################
### # Plot:
### # Juvenile poverty levels
### # (overall and by gender)
### f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
### #ax1.set_title(cities[0] + " " + cities[1] + "Juveniles")
### sns.kdeplot( df['JuvP_Pct'], shade=True, color=c3, ax=ax1)
### #sns.kdeplot(df2['JuvP_Pct'], shade=True, color=c2, ax=ax1)
### sns.kdeplot( df['JuvM_P_PctM'], shade=True, color=c2, ax=ax2)
### sns.kdeplot( df['JuvF_P_PctF'], shade=True, color=c1, ax=ax2);
###
### ax1.set_xlim([0,1])
### ax2.set_xlim([0,1])
### #
### #ax1.set_ylim([0,ylim])
### #ax2.set_ylim([0,ylim])
###
### #f.savefig("onecity_univariate_"+city+"_3.jpg")
#######################
# Plot:
# Senior poverty levels
# (overall and by gender)
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
#ax1.set_title(cities[0] + " " + cities[1] + "Seniors")
sns.kdeplot( df['SrP_Pct'], shade=True, color=c3, ax=ax1)
#sns.kdeplot(df2['SrP_Pct'], shade=True, color=c2, ax=ax1)
sns.kdeplot( df['SrM_P_PctM'], shade=True, color=c2, ax=ax2)
sns.kdeplot( df['SrF_P_PctF'], shade=True, color=c1, ax=ax2);
ax1.set_xlim([0,1])
ax2.set_xlim([0,1])
#
#ax1.set_ylim([0,ylim])
#ax2.set_ylim([0,ylim])
#f.savefig(cities[0]+"_g2_3.jpg")
plt.show()
plt.draw()
if __name__=="__main__":
main()

66
analysis/Table25013.py

@ -0,0 +1,66 @@ @@ -0,0 +1,66 @@
from pymongo import MongoClient
import numpy as np
import pandas as pd
def Table25013(df):
# Populate a numpy ndarray
# with field names in correct positions
"""
B25013 : Tenure by Educational Attainment of Householder
B25013001 : Total:
B25013002 : Owner-occupied housing units:
B25013003 : Less than high school graduate
B25013004 : High school graduate (including equivalency)
B25013005 : Some college or associate's degree
B25013006 : Bachelor's degree or higher
B25013007 : Renter-occupied housing units:
B25013008 : Less than high school graduate
B25013009 : High school graduate (including equivalency)
B25013010 : Some college or associate's degree
B25013011 : Bachelor's degree or higher
B25013universe : Occupied Housing Units
B25013yr : 2013
"""
# tenure by educ attainment
total_fields = np.array(['B25013001',
'B25013002',
'B25013007'])
# col1: house
# col2: rent
table_fields = np.array([['B25013003','B25070008'], # Less than high school graduate
['B25013004','B25070009'], # High school graduate (including equivalency)
['B25013005','B25070010'], # Some college or associate's degree
['B25013006','B25070011'] # Bachelor's degree or higher
])
tot_pop = df[total_fields[0]]
house_pop = df[total_fields[1]]
rent_pop = df[total_fields[2]]
df['TotalPop'] = tot_pop
df['HousePop'] = house_pop
df['RentPop'] = rent_pop
df['HousePct'] = house_pop/tot_pop
df['RentPct'] = rent_pop/tot_pop
df = df.fillna(0)
return df
Loading…
Cancel
Save