7 changed files with 568 additions and 453 deletions
@ -0,0 +1,251 @@
@@ -0,0 +1,251 @@
|
||||
from pymongo import MongoClient |
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
import matplotlib.pyplot as plt |
||||
import seaborn as sns |
||||
|
||||
import statsmodels.api as sm |
||||
from scipy import stats |
||||
|
||||
import metro as mt |
||||
|
||||
|
||||
|
||||
do_univariate_kde = True |
||||
|
||||
cities = ['Seattle']#,'Houston','Los Angeles','New York','Chicago'] |
||||
|
||||
|
||||
def main(): |
||||
do_17001() |
||||
|
||||
|
||||
|
||||
|
||||
def do_17001(): |
||||
|
||||
from Table17001 import Table17001 |
||||
|
||||
# Seaborn |
||||
sns.set_palette("deep", desat=.6) |
||||
sns.set_context(rc={"figure.figsize": (8, 4)}) |
||||
c1, c2, c3 = sns.color_palette("Set1", 3) |
||||
|
||||
# Mongo |
||||
client = MongoClient() |
||||
db = client['metros'] |
||||
metaprops = db['PropertiesMeta'] |
||||
props = db['Properties'] |
||||
|
||||
|
||||
for city in cities: |
||||
|
||||
######################### |
||||
# Mongo lookup |
||||
|
||||
cbsa = mt.CBSACode(city)[0] |
||||
|
||||
pre_search = metaprops.find_one({'geoid':cbsa}) |
||||
|
||||
mongo_search = props.find({'$and': [ |
||||
{'metroid':cbsa}, |
||||
{'geoid':{'$nin':[cbsa]}} |
||||
] |
||||
}) |
||||
|
||||
if 'B17001' not in pre_search['tables']: |
||||
print "Error: could not find table B17001 for city",city,"in db." |
||||
continue |
||||
|
||||
if mongo_search.count()==0: |
||||
print "Error: could not find city",city,"in db." |
||||
continue |
||||
|
||||
|
||||
df = pd.DataFrame([]) |
||||
srch = list(mongo_search) |
||||
for i,r in enumerate(srch): |
||||
if cbsa in r['geoid']: |
||||
del srch[i] |
||||
break |
||||
df = df.append(srch) |
||||
df = Table17001(df) |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
####################### |
||||
# Scatter plots: |
||||
# |
||||
# Total population vs male/female poverty rate |
||||
# Total population vs male/female senior poverty rate |
||||
|
||||
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8)) |
||||
|
||||
# ------------------------ |
||||
# Subplot 1 |
||||
|
||||
ax1.set_title(cities[0]) |
||||
|
||||
#xcode = 'B17001003' |
||||
#xlabel = metaprops.find_one({'code':xcode})['name'] |
||||
#ycode = 'B17001031' |
||||
#ylabel = metaprops.find_one({'code':ycode})['name'] |
||||
|
||||
xcode = 'TotalPop' |
||||
xlabel=xcode |
||||
ycode = 'M_P_PctM' |
||||
ylabel=ycode |
||||
|
||||
sns.regplot(df[xcode],df[ycode], color=c2, ax=ax1, fit_reg=False) |
||||
|
||||
xcode = 'TotalPop' |
||||
xlabel=xcode |
||||
ycode = 'F_P_PctF' |
||||
ylabel=ycode |
||||
|
||||
sns.regplot(df[xcode],df[ycode], color=c1, ax=ax1, fit_reg=False) |
||||
|
||||
xlim = (min(df[xcode]), |
||||
max(df[xcode])) |
||||
ylim = (min(df[ycode]), |
||||
max(df[ycode])) |
||||
|
||||
ax1.set_xlim(xlim) |
||||
ax1.set_ylim(ylim) |
||||
|
||||
ax1.set_xlabel(xlabel) |
||||
ax1.set_ylabel('Pct of M/F Pop in Pov') |
||||
|
||||
# ------------------------ |
||||
# Subplot 2 |
||||
|
||||
|
||||
xcode = 'TotalPop' |
||||
xlabel=xcode |
||||
ycode = 'SrM_P_PctM' |
||||
ylabel=ycode |
||||
sns.regplot(df[xcode],df[ycode], color=c2, ax=ax2, fit_reg=False) |
||||
#sns.regplot(df[xcode],df[ycode] - ((df['TotalPop']*df[ycode])/df['TotalPop']), color=c1, ax=ax2, fit_reg=False) |
||||
|
||||
xcode = 'TotalPop' |
||||
xlabel=xcode |
||||
ycode = 'SrF_P_PctF' |
||||
ylabel=ycode |
||||
sns.regplot(df[xcode],df[ycode], color=c1, ax=ax2, fit_reg=False) |
||||
#sns.regplot(df[xcode],abs(df[ycode] - df[ycode].mean()), color=c1, ax=ax2, fit_reg=False) |
||||
|
||||
xlim = (min(df[xcode]), |
||||
max(df[xcode])) |
||||
ylim = (min(df[ycode]), |
||||
max(df[ycode])) |
||||
|
||||
ax2.set_xlim(xlim) |
||||
ax2.set_ylim(ylim) |
||||
|
||||
ax2.set_xlabel(xlabel) |
||||
ax2.set_ylabel('Pct M/F Seniors in Poverty') |
||||
|
||||
plt.draw() |
||||
plt.show() |
||||
|
||||
|
||||
|
||||
|
||||
####################### |
||||
# Plot: |
||||
# Percent of people in poverty |
||||
# overall, and by gender |
||||
|
||||
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8)) |
||||
#ax1.set_title(cities[0]) |
||||
sns.kdeplot(df['P_Pct'], shade=True, color=c3, ax=ax1) |
||||
sns.kdeplot(df['M_P_PctM'], shade=True, color=c2, ax=ax2) |
||||
sns.kdeplot(df['F_P_PctF'], shade=True, color=c1, ax=ax2); |
||||
|
||||
ax1.set_xlim([0,1]) |
||||
ax2.set_xlim([0,1]) |
||||
# |
||||
#ax1.set_ylim([0,ylim]) |
||||
#ax2.set_ylim([0,ylim]) |
||||
|
||||
|
||||
|
||||
####################### |
||||
# Plot: |
||||
# Pre-adolescent poverty levels |
||||
# (overall and by gender) |
||||
|
||||
xlim = 0.50 |
||||
ylim = 5 |
||||
|
||||
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8)) |
||||
#ax1.set_title(cities[0] + " " + cities[1] + "Pre-Adolesc") |
||||
sns.kdeplot( df['PAP_Pct'], shade=True, color=c3, ax=ax1) |
||||
#sns.kdeplot(df2['PAP_Pct'], shade=True, color=c2, ax=ax1) |
||||
sns.kdeplot( df['PAM_P_PctM'], shade=True, color=c2, ax=ax2) |
||||
sns.kdeplot( df['PAF_P_PctF'], shade=True, color=c1, ax=ax2); |
||||
|
||||
ax1.set_xlim([0,1]) |
||||
ax2.set_xlim([0,1]) |
||||
# |
||||
#ax1.set_ylim([0,ylim]) |
||||
#ax2.set_ylim([0,ylim]) |
||||
|
||||
#f.savefig("onecity_univariate_"+city+"_2.jpg") |
||||
|
||||
|
||||
### ####################### |
||||
### # Plot: |
||||
### # Juvenile poverty levels |
||||
### # (overall and by gender) |
||||
|
||||
### f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8)) |
||||
### #ax1.set_title(cities[0] + " " + cities[1] + "Juveniles") |
||||
### sns.kdeplot( df['JuvP_Pct'], shade=True, color=c3, ax=ax1) |
||||
### #sns.kdeplot(df2['JuvP_Pct'], shade=True, color=c2, ax=ax1) |
||||
### sns.kdeplot( df['JuvM_P_PctM'], shade=True, color=c2, ax=ax2) |
||||
### sns.kdeplot( df['JuvF_P_PctF'], shade=True, color=c1, ax=ax2); |
||||
### |
||||
### ax1.set_xlim([0,1]) |
||||
### ax2.set_xlim([0,1]) |
||||
### # |
||||
### #ax1.set_ylim([0,ylim]) |
||||
### #ax2.set_ylim([0,ylim]) |
||||
### |
||||
### #f.savefig("onecity_univariate_"+city+"_3.jpg") |
||||
|
||||
|
||||
|
||||
####################### |
||||
# Plot: |
||||
# Senior poverty levels |
||||
# (overall and by gender) |
||||
|
||||
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8)) |
||||
#ax1.set_title(cities[0] + " " + cities[1] + "Seniors") |
||||
sns.kdeplot( df['SrP_Pct'], shade=True, color=c3, ax=ax1) |
||||
#sns.kdeplot(df2['SrP_Pct'], shade=True, color=c2, ax=ax1) |
||||
sns.kdeplot( df['SrM_P_PctM'], shade=True, color=c2, ax=ax2) |
||||
sns.kdeplot( df['SrF_P_PctF'], shade=True, color=c1, ax=ax2); |
||||
|
||||
ax1.set_xlim([0,1]) |
||||
ax2.set_xlim([0,1]) |
||||
# |
||||
#ax1.set_ylim([0,ylim]) |
||||
#ax2.set_ylim([0,ylim]) |
||||
|
||||
#f.savefig(cities[0]+"_g2_3.jpg") |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__=="__main__": |
||||
main() |
||||
|
||||
|
@ -0,0 +1,108 @@
@@ -0,0 +1,108 @@
|
||||
from pymongo import MongoClient |
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
import matplotlib.pyplot as plt |
||||
import seaborn as sns |
||||
|
||||
import statsmodels.api as sm |
||||
from scipy import stats |
||||
|
||||
import metro as mt |
||||
|
||||
|
||||
do_univariate_kde = True |
||||
|
||||
cities = ['Seattle']#,'Houston','Los Angeles','New York','Chicago'] |
||||
|
||||
def main(): |
||||
do_25013() |
||||
|
||||
|
||||
|
||||
def do_25013(): |
||||
|
||||
from Table25013 import Table25013 |
||||
|
||||
# Seaborn |
||||
sns.set_palette("deep", desat=.6) |
||||
sns.set_context(rc={"figure.figsize": (8, 4)}) |
||||
|
||||
# Mongo |
||||
client = MongoClient() |
||||
db = client['metros'] |
||||
metaprops = db['PropertiesMeta'] |
||||
props = db['Properties'] |
||||
|
||||
for city in cities: |
||||
|
||||
######################### |
||||
# Mongo lookup |
||||
|
||||
cbsa = mt.CBSACode(city)[0] |
||||
|
||||
pre_search = metaprops.find_one({'geoid':cbsa}) |
||||
|
||||
mongo_search = props.find({'$and': [ |
||||
{'metroid':cbsa}, |
||||
{'geoid':{'$nin':[cbsa]}} |
||||
] |
||||
}) |
||||
|
||||
if 'B25070' not in pre_search['tables']: |
||||
print "Error: could not find table B25070 for city",city,"in db." |
||||
continue |
||||
|
||||
if mongo_search.count()==0: |
||||
print "Error: could not find city",city,"in db." |
||||
continue |
||||
|
||||
df = pd.DataFrame([]) |
||||
srch = list(mongo_search) |
||||
for i,r in enumerate(srch): |
||||
if cbsa in r['geoid']: |
||||
del srch[i] |
||||
break |
||||
df = df.append(srch) |
||||
df = Table25013(df) |
||||
|
||||
|
||||
####################### |
||||
# |
||||
# renters vs owners |
||||
|
||||
f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(12, 6)) |
||||
|
||||
# ------------------------ |
||||
# Subplot 1 |
||||
|
||||
ax1.set_title(cities[0]) |
||||
|
||||
cmap = sns.cubehelix_palette(8, start=0.5, rot=-.75, as_cmap=True) |
||||
sns.kdeplot(df['HousePct'],df['RentPct'],shade=True,cmap=cmap,ax=ax1) |
||||
ax1.set_xlim([0,1]) |
||||
ax1.set_ylim([0,1]) |
||||
|
||||
|
||||
|
||||
|
||||
####################### |
||||
# |
||||
# |
||||
|
||||
f, ax1 = plt.subplots(1, 2, sharex=True, figsize=(12, 6)) |
||||
|
||||
sns.cubehelix_palette(8, start=.8, rot=-.55) |
||||
|
||||
|
||||
|
||||
plt.show() |
||||
plt.draw() |
||||
|
||||
|
||||
|
||||
|
||||
if __name__=="__main__": |
||||
main() |
||||
|
@ -1,250 +0,0 @@
@@ -1,250 +0,0 @@
|
||||
from pymongo import MongoClient |
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
import matplotlib.pyplot as plt |
||||
import seaborn as sns |
||||
|
||||
import statsmodels.api as sm |
||||
from scipy import stats |
||||
|
||||
import metro as mt |
||||
|
||||
|
||||
do_univariate_kde = True |
||||
|
||||
cities = ['Seattle']#,'Houston','Los Angeles','New York','Chicago'] |
||||
|
||||
|
||||
def main(): |
||||
do_25070() |
||||
|
||||
|
||||
def do_25070(): |
||||
|
||||
from Table25070 import Table25070 |
||||
|
||||
# Seaborn |
||||
sns.set_palette("deep", desat=.6) |
||||
sns.set_context(rc={"figure.figsize": (8, 4)}) |
||||
|
||||
# Mongo |
||||
client = MongoClient() |
||||
db = client['metros'] |
||||
metaprops = db['PropertiesMeta'] |
||||
props = db['Properties'] |
||||
|
||||
for city in cities: |
||||
|
||||
######################### |
||||
# Mongo lookup |
||||
|
||||
cbsa = mt.CBSACode(city)[0] |
||||
|
||||
pre_search = metaprops.find_one({'geoid':cbsa}) |
||||
|
||||
mongo_search = props.find({'$and': [ |
||||
{'metroid':cbsa}, |
||||
{'geoid':{'$nin':[cbsa]}} |
||||
] |
||||
}) |
||||
|
||||
if 'B25070' not in pre_search['tables']: |
||||
print "Error: could not find table B25070 for city",city,"in db." |
||||
continue |
||||
|
||||
if mongo_search.count()==0: |
||||
print "Error: could not find city",city,"in db." |
||||
continue |
||||
|
||||
|
||||
df = pd.DataFrame([]) |
||||
srch = list(mongo_search) |
||||
for i,r in enumerate(srch): |
||||
if cbsa in r['geoid']: |
||||
del srch[i] |
||||
break |
||||
df = df.append(srch) |
||||
df = Table25070(df) |
||||
|
||||
|
||||
|
||||
####################### |
||||
# Bar plot: |
||||
|
||||
f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(12, 6)) |
||||
|
||||
|
||||
# ------------------------ |
||||
# Subplot 1 |
||||
|
||||
ax1.set_title(cities[0]) |
||||
|
||||
labels = ["0-10","10-15","15-20","20-25","25-30","30-35","35-40","40-50","50+"] |
||||
colpal = sns.color_palette("GnBu_d",len(labels)) |
||||
|
||||
ddt = {} |
||||
for i,lab in enumerate(labels): |
||||
tlab = "Rent_"+lab+"_Pct" |
||||
sns.kdeplot(df[tlab], shade=True, color=colpal[i], ax=ax1) |
||||
ax1.set_xlim([0.0,1.0]) |
||||
|
||||
|
||||
plt.show() |
||||
plt.draw() |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def do_17001(): |
||||
|
||||
from Table17001 import Table17001 |
||||
|
||||
|
||||
# Seaborn |
||||
sns.set_palette("deep", desat=.6) |
||||
sns.set_context(rc={"figure.figsize": (8, 4)}) |
||||
c1, c2, c3 = sns.color_palette("Set1", 3) |
||||
|
||||
# Mongo |
||||
client = MongoClient() |
||||
db = client['metros'] |
||||
metaprops = db['PropertiesMeta'] |
||||
props = db['Properties'] |
||||
|
||||
if do_univariate_kde: |
||||
|
||||
for city in cities: |
||||
|
||||
######################### |
||||
# Mongo lookup |
||||
|
||||
cbsa = mt.CBSACode(city)[0] |
||||
|
||||
pre_search = metaprops.find_one({'geoid':cbsa}) |
||||
|
||||
mongo_search = props.find({'$and': [ |
||||
{'metroid':cbsa}, |
||||
{'geoid':{'$nin':[cbsa]}} |
||||
] |
||||
}) |
||||
|
||||
if 'B17001' not in pre_search['tables']: |
||||
print "Error: could not find table B17001 for city",city,"in db." |
||||
continue |
||||
|
||||
if mongo_search.count()==0: |
||||
print "Error: could not find city",city,"in db." |
||||
continue |
||||
|
||||
|
||||
df = pd.DataFrame([]) |
||||
srch = list(mongo_search) |
||||
for i,r in enumerate(srch): |
||||
if cbsa in r['geoid']: |
||||
del srch[i] |
||||
break |
||||
df = df.append(srch) |
||||
df = Table17001(df) |
||||
|
||||
|
||||
####################### |
||||
# Plot: |
||||
# Percent of people in poverty |
||||
# overall, and by gender |
||||
|
||||
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8)) |
||||
#ax1.set_title(cities[0]) |
||||
sns.kdeplot(df['P_Pct'], shade=True, color=c3, ax=ax1) |
||||
sns.kdeplot(df['M_P_PctM'], shade=True, color=c2, ax=ax2) |
||||
sns.kdeplot(df['F_P_PctF'], shade=True, color=c1, ax=ax2); |
||||
|
||||
ax1.set_xlim([0,1]) |
||||
ax2.set_xlim([0,1]) |
||||
# |
||||
#ax1.set_ylim([0,ylim]) |
||||
#ax2.set_ylim([0,ylim]) |
||||
|
||||
#f.savefig("onecity_univariate_"+city+"_1.jpg") |
||||
|
||||
|
||||
|
||||
|
||||
####################### |
||||
# Plot: |
||||
# Pre-adolescent poverty levels |
||||
# (overall and by gender) |
||||
|
||||
xlim = 0.50 |
||||
ylim = 5 |
||||
|
||||
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8)) |
||||
#ax1.set_title(cities[0] + " " + cities[1] + "Pre-Adolesc") |
||||
sns.kdeplot( df['PAP_Pct'], shade=True, color=c3, ax=ax1) |
||||
#sns.kdeplot(df2['PAP_Pct'], shade=True, color=c2, ax=ax1) |
||||
sns.kdeplot( df['PAM_P_PctM'], shade=True, color=c2, ax=ax2) |
||||
sns.kdeplot( df['PAF_P_PctF'], shade=True, color=c1, ax=ax2); |
||||
|
||||
ax1.set_xlim([0,1]) |
||||
ax2.set_xlim([0,1]) |
||||
# |
||||
#ax1.set_ylim([0,ylim]) |
||||
#ax2.set_ylim([0,ylim]) |
||||
|
||||
#f.savefig("onecity_univariate_"+city+"_2.jpg") |
||||
|
||||
|
||||
### ####################### |
||||
### # Plot: |
||||
### # Juvenile poverty levels |
||||
### # (overall and by gender) |
||||
|
||||
### f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8)) |
||||
### #ax1.set_title(cities[0] + " " + cities[1] + "Juveniles") |
||||
### sns.kdeplot( df['JuvP_Pct'], shade=True, color=c3, ax=ax1) |
||||
### #sns.kdeplot(df2['JuvP_Pct'], shade=True, color=c2, ax=ax1) |
||||
### sns.kdeplot( df['JuvM_P_PctM'], shade=True, color=c2, ax=ax2) |
||||
### sns.kdeplot( df['JuvF_P_PctF'], shade=True, color=c1, ax=ax2); |
||||
### |
||||
### ax1.set_xlim([0,1]) |
||||
### ax2.set_xlim([0,1]) |
||||
### # |
||||
### #ax1.set_ylim([0,ylim]) |
||||
### #ax2.set_ylim([0,ylim]) |
||||
### |
||||
### #f.savefig("onecity_univariate_"+city+"_3.jpg") |
||||
|
||||
|
||||
|
||||
####################### |
||||
# Plot: |
||||
# Senior poverty levels |
||||
# (overall and by gender) |
||||
|
||||
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8)) |
||||
#ax1.set_title(cities[0] + " " + cities[1] + "Seniors") |
||||
sns.kdeplot( df['SrP_Pct'], shade=True, color=c3, ax=ax1) |
||||
#sns.kdeplot(df2['SrP_Pct'], shade=True, color=c2, ax=ax1) |
||||
sns.kdeplot( df['SrM_P_PctM'], shade=True, color=c2, ax=ax2) |
||||
sns.kdeplot( df['SrF_P_PctF'], shade=True, color=c1, ax=ax2); |
||||
|
||||
ax1.set_xlim([0,1]) |
||||
ax2.set_xlim([0,1]) |
||||
# |
||||
#ax1.set_ylim([0,ylim]) |
||||
#ax2.set_ylim([0,ylim]) |
||||
|
||||
#f.savefig(cities[0]+"_g2_3.jpg") |
||||
|
||||
|
||||
plt.show() |
||||
plt.draw() |
||||
|
||||
if __name__=="__main__": |
||||
main() |
@ -0,0 +1,66 @@
@@ -0,0 +1,66 @@
|
||||
from pymongo import MongoClient |
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
|
||||
|
||||
|
||||
def Table25013(df): |
||||
|
||||
# Populate a numpy ndarray |
||||
# with field names in correct positions |
||||
|
||||
""" |
||||
B25013 : Tenure by Educational Attainment of Householder |
||||
B25013001 : Total: |
||||
B25013002 : Owner-occupied housing units: |
||||
B25013003 : Less than high school graduate |
||||
B25013004 : High school graduate (including equivalency) |
||||
B25013005 : Some college or associate's degree |
||||
B25013006 : Bachelor's degree or higher |
||||
B25013007 : Renter-occupied housing units: |
||||
B25013008 : Less than high school graduate |
||||
B25013009 : High school graduate (including equivalency) |
||||
B25013010 : Some college or associate's degree |
||||
B25013011 : Bachelor's degree or higher |
||||
B25013universe : Occupied Housing Units |
||||
B25013yr : 2013 |
||||
""" |
||||
|
||||
|
||||
# tenure by educ attainment |
||||
total_fields = np.array(['B25013001', |
||||
'B25013002', |
||||
'B25013007']) |
||||
|
||||
# col1: house |
||||
# col2: rent |
||||
table_fields = np.array([['B25013003','B25070008'], # Less than high school graduate |
||||
['B25013004','B25070009'], # High school graduate (including equivalency) |
||||
['B25013005','B25070010'], # Some college or associate's degree |
||||
['B25013006','B25070011'] # Bachelor's degree or higher |
||||
]) |
||||
|
||||
tot_pop = df[total_fields[0]] |
||||
house_pop = df[total_fields[1]] |
||||
rent_pop = df[total_fields[2]] |
||||
|
||||
df['TotalPop'] = tot_pop |
||||
df['HousePop'] = house_pop |
||||
df['RentPop'] = rent_pop |
||||
|
||||
df['HousePct'] = house_pop/tot_pop |
||||
df['RentPct'] = rent_pop/tot_pop |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
df = df.fillna(0) |
||||
|
||||
return df |
||||
|
||||
|
Loading…
Reference in new issue