breaking out plotting stuff into separate analyze scripts. for organizational logic.

10 years ago · 5d4c449394
7 changed files with 568 additions and 453 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
				@@ -1,3 +1,5 @@
+*.file
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
--- a/analysis/OneCity_Scatter.py
+++ b/analysis/OneCity_Scatter.py
@ -12,10 +12,7 @@ from scipy import stats
				@@ -12,10 +12,7 @@ from scipy import stats
 import metro as mt


-
-do_univariate_kde = True
-
-cities = ['Seattle']#,'Houston','Los Angeles','New York','Chicago']
+cities = ['Seattle']


 def main():
@ -30,7 +27,6 @@ def do_15002():
				@@ -30,7 +27,6 @@ def do_15002():
    # Seaborn 
    sns.set_palette("deep", desat=.6)
    sns.set_context(rc={"figure.figsize": (8, 4)})
-    c1, c2, c3 = sns.color_palette("Set1", 3)
    
    # Mongo
    client = MongoClient()
@ -72,8 +68,72 @@ def do_15002():
				@@ -72,8 +68,72 @@ def do_15002():
        df = Table15002(df)


+
+        colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple"]
+        xcp = sns.xkcd_palette(colors)
+
+
+
+
+
+
+
+        #######################
+        # Bar plot:
+        #
+        # pct of population in each ed category
+    
+        f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(8, 8))
+    
+
+        # ------------------------
+        # Subplot 1
+    
+        ax1.set_title(cities[0])
+    
+        #ddm = {}
+        #ddf = {}
+        ddt = {}
+        for i in range(1,5+1):
+            #ddm[str(i)] = df["M_EdCat%d_Pct"%(i)].values
+            #ddf[str(i)] = df["F_EdCat%d_Pct"%(i)].values
+            ddt[str(i)] = df["EdCat%d_Pct"%(i)].values
+
+
+        ######################
+        ######################
+        ### Wrong!!!
+        ### yy is a vector
+        ### this only plots 
+        ### the first element
+        ######################
+        ######################
+
+
+        xx = np.array(ddt.keys())
+        yy = np.array(ddt.values())
+        sns.barplot(xx,yy,palette=xcp,ax=ax1)
+
+        #xx = np.array(ddm.keys())
+        #yy = np.array(ddm.values())
+        #sns.barplot(xx,yy,palette=xcp,ax=ax2)
+
+        #xx = np.array(ddf.keys())
+        #yy = np.array(ddf.values())
+        #sns.barplot(xx,yy,palette=xcp,ax=ax3)
+
+        plt.show()
+        plt.draw()
+
+
+
+
+
        #######################
        # Scatter plot:
+        #
+        # average vs variance 
+        # average vs gender imbalance
    
        f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
    
@ -134,6 +194,9 @@ def do_15002():
				@@ -134,6 +194,9 @@ def do_15002():

        #######################
        # Scatter plot:
+        # 
+        # variances vs averages
+        # variances vs gender imbalance
    
        f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
    
@ -186,131 +249,28 @@ def do_15002():
				@@ -186,131 +249,28 @@ def do_15002():
        sns.regplot(df[xcode],df[ycode], color=c3, ax=ax2, fit_reg=False)


+
+
+
+
+
+
    plt.draw()
    plt.show()



-def do_17001():


-    from Table17001 import Table17001
-    
-    
-    # Seaborn 
-    sns.set_palette("deep", desat=.6)
-    sns.set_context(rc={"figure.figsize": (8, 4)})
-    c1, c2, c3 = sns.color_palette("Set1", 3)
-    
-    # Mongo
-    client = MongoClient()
-    db = client['metros']
-    metaprops = db['PropertiesMeta']
-    props = db['Properties']
-        
-    
-    for city in cities:
-    
-        #########################
-        # Mongo lookup
-    
-        cbsa = mt.CBSACode(city)[0]
-    
-        pre_search = metaprops.find_one({'geoid':cbsa})
-    
-        mongo_search = props.find({'$and': [
-                                            {'metroid':cbsa},
-                                            {'geoid':{'$nin':[cbsa]}}
-                                            ]
-                                })
-    
-        if 'B17001' not in pre_search['tables']:
-            print "Error: could not find table B17001 for city",city,"in db."
-            continue
-    
-        if mongo_search.count()==0:
-            print "Error: could not find city",city,"in db."
-            continue
-    
-    
-        df = pd.DataFrame([])
-        srch = list(mongo_search)
-        for i,r in enumerate(srch):
-            if cbsa in r['geoid']:
-                del srch[i]
-                break
-        df = df.append(srch)
-        df = Table17001(df)
-    
-    
-        #######################
-        # Scatter plots:
-    
-        f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
-    
-        # ------------------------
-        # Subplot 1
-    
-        ax1.set_title(cities[0])
-    
-        #xcode = 'B17001003'
-        #xlabel = metaprops.find_one({'code':xcode})['name']
-        #ycode = 'B17001031'
-        #ylabel = metaprops.find_one({'code':ycode})['name']
-    
-        xcode = 'TotalPop'
-        xlabel=xcode
-        ycode = 'M_P_PctM'
-        ylabel=ycode
-    
-        sns.regplot(df[xcode],df[ycode], color=c2, ax=ax1, fit_reg=False)
-    
-        xcode = 'TotalPop'
-        xlabel=xcode
-        ycode = 'F_P_PctF'
-        ylabel=ycode
-    
-        sns.regplot(df[xcode],df[ycode], color=c1, ax=ax1, fit_reg=False)
-    
-        xlim = (min(df[xcode]),
-                max(df[xcode]))
-        ylim = (min(df[ycode]),
-                max(df[ycode]))
-    
-        ax1.set_xlim(xlim)
-        ax1.set_ylim(ylim)
-    
-        ax1.set_xlabel(xlabel)
-        ax1.set_ylabel('Pct of M/F Pop in Pov')
-    
-        # ------------------------
-        # Subplot 2
-    
-    
-        xcode = 'TotalPop'
-        xlabel=xcode
-        ycode = 'SrM_P_PctM'
-        ylabel=ycode
-        sns.regplot(df[xcode],df[ycode], color=c2, ax=ax2, fit_reg=False)
-        #sns.regplot(df[xcode],df[ycode] - ((df['TotalPop']*df[ycode])/df['TotalPop']), color=c1, ax=ax2, fit_reg=False)
-    
-        xcode = 'TotalPop'
-        xlabel=xcode
-        ycode = 'SrF_P_PctF'
-        ylabel=ycode
-        sns.regplot(df[xcode],df[ycode], color=c1, ax=ax2, fit_reg=False)
-        #sns.regplot(df[xcode],abs(df[ycode] - df[ycode].mean()), color=c1, ax=ax2, fit_reg=False)
-    
-        xlim = (min(df[xcode]),
-                max(df[xcode]))
-        ylim = (min(df[ycode]),
-                max(df[ycode]))
-    
-        ax2.set_xlim(xlim)
-        ax2.set_ylim(ylim)
-    
-        ax2.set_xlabel(xlabel)
-        #ax2.set_ylabel('Pct of M/F Pre-Adol. Pop in Pov')
+
+
+
+
+
+
+
+
+

    plt.draw()
    plt.show()
--- a/analysis/Analyze17001.py
+++ b/analysis/Analyze17001.py
@ -0,0 +1,251 @@
				@@ -0,0 +1,251 @@
+from pymongo import MongoClient
+
+import numpy as np
+import pandas as pd
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+import statsmodels.api as sm
+from scipy import stats
+
+import metro as mt
+
+
+
+do_univariate_kde = True
+
+cities = ['Seattle']#,'Houston','Los Angeles','New York','Chicago']
+
+
+def main():
+    do_17001()
+
+
+
+
+def do_17001():
+
+    from Table17001 import Table17001
+    
+    # Seaborn 
+    sns.set_palette("deep", desat=.6)
+    sns.set_context(rc={"figure.figsize": (8, 4)})
+    c1, c2, c3 = sns.color_palette("Set1", 3)
+    
+    # Mongo
+    client = MongoClient()
+    db = client['metros']
+    metaprops = db['PropertiesMeta']
+    props = db['Properties']
+        
+    
+    for city in cities:
+    
+        #########################
+        # Mongo lookup
+    
+        cbsa = mt.CBSACode(city)[0]
+    
+        pre_search = metaprops.find_one({'geoid':cbsa})
+    
+        mongo_search = props.find({'$and': [
+                                            {'metroid':cbsa},
+                                            {'geoid':{'$nin':[cbsa]}}
+                                            ]
+                                })
+    
+        if 'B17001' not in pre_search['tables']:
+            print "Error: could not find table B17001 for city",city,"in db."
+            continue
+    
+        if mongo_search.count()==0:
+            print "Error: could not find city",city,"in db."
+            continue
+    
+    
+        df = pd.DataFrame([])
+        srch = list(mongo_search)
+        for i,r in enumerate(srch):
+            if cbsa in r['geoid']:
+                del srch[i]
+                break
+        df = df.append(srch)
+        df = Table17001(df)
+    
+
+
+
+    
+        #######################
+        # Scatter plots:
+        #
+        # Total population vs male/female poverty rate
+        # Total population vs male/female senior poverty rate
+    
+        f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
+    
+        # ------------------------
+        # Subplot 1
+    
+        ax1.set_title(cities[0])
+    
+        #xcode = 'B17001003'
+        #xlabel = metaprops.find_one({'code':xcode})['name']
+        #ycode = 'B17001031'
+        #ylabel = metaprops.find_one({'code':ycode})['name']
+    
+        xcode = 'TotalPop'
+        xlabel=xcode
+        ycode = 'M_P_PctM'
+        ylabel=ycode
+    
+        sns.regplot(df[xcode],df[ycode], color=c2, ax=ax1, fit_reg=False)
+    
+        xcode = 'TotalPop'
+        xlabel=xcode
+        ycode = 'F_P_PctF'
+        ylabel=ycode
+    
+        sns.regplot(df[xcode],df[ycode], color=c1, ax=ax1, fit_reg=False)
+    
+        xlim = (min(df[xcode]),
+                max(df[xcode]))
+        ylim = (min(df[ycode]),
+                max(df[ycode]))
+    
+        ax1.set_xlim(xlim)
+        ax1.set_ylim(ylim)
+    
+        ax1.set_xlabel(xlabel)
+        ax1.set_ylabel('Pct of M/F Pop in Pov')
+    
+        # ------------------------
+        # Subplot 2
+    
+    
+        xcode = 'TotalPop'
+        xlabel=xcode
+        ycode = 'SrM_P_PctM'
+        ylabel=ycode
+        sns.regplot(df[xcode],df[ycode], color=c2, ax=ax2, fit_reg=False)
+        #sns.regplot(df[xcode],df[ycode] - ((df['TotalPop']*df[ycode])/df['TotalPop']), color=c1, ax=ax2, fit_reg=False)
+    
+        xcode = 'TotalPop'
+        xlabel=xcode
+        ycode = 'SrF_P_PctF'
+        ylabel=ycode
+        sns.regplot(df[xcode],df[ycode], color=c1, ax=ax2, fit_reg=False)
+        #sns.regplot(df[xcode],abs(df[ycode] - df[ycode].mean()), color=c1, ax=ax2, fit_reg=False)
+    
+        xlim = (min(df[xcode]),
+                max(df[xcode]))
+        ylim = (min(df[ycode]),
+                max(df[ycode]))
+    
+        ax2.set_xlim(xlim)
+        ax2.set_ylim(ylim)
+    
+        ax2.set_xlabel(xlabel)
+        ax2.set_ylabel('Pct M/F Seniors in Poverty')
+
+        plt.draw()
+        plt.show()
+
+
+
+
+        #######################
+        # Plot:
+        # Percent of people in poverty
+        # overall, and by gender
+        
+        f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
+        #ax1.set_title(cities[0])
+        sns.kdeplot(df['P_Pct'],    shade=True, color=c3, ax=ax1)
+        sns.kdeplot(df['M_P_PctM'], shade=True, color=c2, ax=ax2)
+        sns.kdeplot(df['F_P_PctF'], shade=True, color=c1, ax=ax2);
+        
+        ax1.set_xlim([0,1])
+        ax2.set_xlim([0,1])
+        #
+        #ax1.set_ylim([0,ylim])
+        #ax2.set_ylim([0,ylim])
+            
+    
+
+        #######################
+        # Plot:
+        # Pre-adolescent poverty levels 
+        # (overall and by gender)
+    
+        xlim = 0.50
+        ylim = 5
+    
+        f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
+        #ax1.set_title(cities[0] + " " + cities[1] + "Pre-Adolesc")
+        sns.kdeplot( df['PAP_Pct'],    shade=True, color=c3, ax=ax1)
+        #sns.kdeplot(df2['PAP_Pct'],   shade=True, color=c2, ax=ax1)
+        sns.kdeplot( df['PAM_P_PctM'], shade=True, color=c2, ax=ax2)
+        sns.kdeplot( df['PAF_P_PctF'], shade=True, color=c1, ax=ax2);
+        
+        ax1.set_xlim([0,1])
+        ax2.set_xlim([0,1])
+        #
+        #ax1.set_ylim([0,ylim])
+        #ax2.set_ylim([0,ylim])
+        
+        #f.savefig("onecity_univariate_"+city+"_2.jpg")
+    
+    
+        ### #######################
+        ### # Plot:
+        ### # Juvenile poverty levels 
+        ### # (overall and by gender)
+        
+        ### f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
+        ### #ax1.set_title(cities[0] + " " + cities[1] + "Juveniles")
+        ### sns.kdeplot( df['JuvP_Pct'],    shade=True, color=c3, ax=ax1)
+        ### #sns.kdeplot(df2['JuvP_Pct'],   shade=True, color=c2, ax=ax1)
+        ### sns.kdeplot( df['JuvM_P_PctM'], shade=True, color=c2, ax=ax2)
+        ### sns.kdeplot( df['JuvF_P_PctF'], shade=True, color=c1, ax=ax2);
+        ### 
+        ### ax1.set_xlim([0,1])
+        ### ax2.set_xlim([0,1])
+        ### #
+        ### #ax1.set_ylim([0,ylim])
+        ### #ax2.set_ylim([0,ylim])
+        ### 
+        ### #f.savefig("onecity_univariate_"+city+"_3.jpg")
+        
+        
+    
+        #######################
+        # Plot:
+        # Senior poverty levels 
+        # (overall and by gender)
+    
+        f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
+        #ax1.set_title(cities[0] + " " + cities[1] + "Seniors")
+        sns.kdeplot( df['SrP_Pct'],    shade=True, color=c3, ax=ax1)
+        #sns.kdeplot(df2['SrP_Pct'],    shade=True, color=c2, ax=ax1)
+        sns.kdeplot( df['SrM_P_PctM'], shade=True, color=c2, ax=ax2)
+        sns.kdeplot( df['SrF_P_PctF'], shade=True, color=c1, ax=ax2);
+    
+        ax1.set_xlim([0,1])
+        ax2.set_xlim([0,1])
+        #
+        #ax1.set_ylim([0,ylim])
+        #ax2.set_ylim([0,ylim])
+        
+        #f.savefig(cities[0]+"_g2_3.jpg")
+    
+
+
+
+
+
+if __name__=="__main__":
+    main()
+
+
--- a/analysis/Analyze25013.py
+++ b/analysis/Analyze25013.py
@ -0,0 +1,108 @@
				@@ -0,0 +1,108 @@
+from pymongo import MongoClient
+
+import numpy as np
+import pandas as pd
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+import statsmodels.api as sm
+from scipy import stats
+
+import metro as mt
+
+
+do_univariate_kde = True
+
+cities = ['Seattle']#,'Houston','Los Angeles','New York','Chicago']
+
+def main():
+    do_25013()
+
+
+
+def do_25013():
+
+    from Table25013 import Table25013
+
+    # Seaborn 
+    sns.set_palette("deep", desat=.6)
+    sns.set_context(rc={"figure.figsize": (8, 4)})
+
+    # Mongo
+    client = MongoClient()
+    db = client['metros']
+    metaprops = db['PropertiesMeta']
+    props = db['Properties']
+
+    for city in cities:
+
+        #########################
+        # Mongo lookup
+    
+        cbsa = mt.CBSACode(city)[0]
+    
+        pre_search = metaprops.find_one({'geoid':cbsa})
+    
+        mongo_search = props.find({'$and': [
+                                            {'metroid':cbsa},
+                                            {'geoid':{'$nin':[cbsa]}}
+                                            ]
+                                })
+
+        if 'B25070' not in pre_search['tables']:
+            print "Error: could not find table B25070 for city",city,"in db."
+            continue
+    
+        if mongo_search.count()==0:
+            print "Error: could not find city",city,"in db."
+            continue
+
+        df = pd.DataFrame([])
+        srch = list(mongo_search)
+        for i,r in enumerate(srch):
+            if cbsa in r['geoid']:
+                del srch[i]
+                break
+        df = df.append(srch)
+        df = Table25013(df)
+
+
+        #######################
+        # 
+        # renters vs owners
+    
+        f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(12, 6))
+
+        # ------------------------
+        # Subplot 1
+    
+        ax1.set_title(cities[0])
+
+        cmap = sns.cubehelix_palette(8, start=0.5, rot=-.75, as_cmap=True)
+        sns.kdeplot(df['HousePct'],df['RentPct'],shade=True,cmap=cmap,ax=ax1)
+        ax1.set_xlim([0,1])
+        ax1.set_ylim([0,1])
+
+
+
+
+        #######################
+        # 
+        # 
+
+        f, ax1 = plt.subplots(1, 2, sharex=True, figsize=(12, 6))
+
+        sns.cubehelix_palette(8, start=.8, rot=-.55)
+        
+
+
+        plt.show()
+        plt.draw()
+
+
+
+
+if __name__=="__main__":
+    main()
+
--- a/analysis/Analyze25070.py
+++ b/analysis/Analyze25070.py
@ -89,7 +89,6 @@ def do_25070():
				@@ -89,7 +89,6 @@ def do_25070():

        xx = np.array(ddt.keys())
        yy = np.array(ddt.values())
-        import pdb; pdb.set_trace()
        sns.barplot(xx,yy,palette="Set1",ax=ax1)

        #xx = np.array(ddm.keys())
@ -105,111 +104,91 @@ def do_25070():
				@@ -105,111 +104,91 @@ def do_25070():



+        #######################
+        # univariate KDE
+        # 
+        # density of (rent as pct of income) categories
+    
+        f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(12, 6))
+    
+        # ------------------------
+        # Subplot 1
+    
+        ax1.set_title(cities[0])

+        labels = ["0-10","10-15","15-20","20-25","25-30","30-35","35-40","40-50","50+"]
+        colpal = sns.color_palette("GnBu_d",len(labels))

-def do_15002():
-
-    from Table15002 import Table15002
+        ddt = {}
+        for i,lab in enumerate(labels):
+            tlab = "Rent_"+lab+"_Pct"
+            sns.kdeplot(df[tlab], shade=True, color=colpal[i], ax=ax1)
+        ax1.set_xlim([0.001,1.0])

-    # Seaborn 
-    sns.set_palette("deep", desat=.6)
-    sns.set_context(rc={"figure.figsize": (8, 4)})
-    
-    # Mongo
-    client = MongoClient()
-    db = client['metros']
-    metaprops = db['PropertiesMeta']
-    props = db['Properties']

-    for city in cities:

-        #########################
-        # Mongo lookup
-    
-        cbsa = mt.CBSACode(city)[0]
-    
-        pre_search = metaprops.find_one({'geoid':cbsa})
-    
-        mongo_search = props.find({'$and': [
-                                            {'metroid':cbsa},
-                                            {'geoid':{'$nin':[cbsa]}}
-                                            ]
-                                })

-        if 'B15002' not in pre_search['tables']:
-            print "Error: could not find table B15002 for city",city,"in db."
-            continue
-    
-        if mongo_search.count()==0:
-            print "Error: could not find city",city,"in db."
-            continue
-    
+        #######################
+        # joint KDE
+        # 
+        # joint density of 
+        # (rent 10-15% of income) 
+        # (rent 40-50% of income)

-        df = pd.DataFrame([])
-        srch = list(mongo_search)
-        for i,r in enumerate(srch):
-            if cbsa in r['geoid']:
-                del srch[i]
-                break
-        df = df.append(srch)
-        df = Table15002(df)
+        #f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(12, 6))

+        #ax1.set_title(cities[0])

+        #cmap = sns.cubehelix_palette(8, start=0.5, rot=-.75, as_cmap=True)
+        #tlab1 = "Rent_10-15_Pct"
+        #tlab2 = "Rent_40-50_Pct"
+        #sns.kdeplot(df[tlab1], df[tlab2], shade=True, cmap=cmap, ax=ax1)

-        colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple"]
-        xcp = sns.xkcd_palette(colors)



        #######################
-        # Bar plot:
-    
-        f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(8, 8))
-    
+        # pair grid 
+        # multivariate whiz-bang plot
+        # 
+        # joint scatterplots
+        # univariate kdes 
+        # joint kdes
+        # (x/y limits default to -3, +3)

-        # ------------------------
-        # Subplot 1
-    
-        ax1.set_title(cities[0])
-    
-        #ddm = {}
-        #ddf = {}
-        ddt = {}
-        for i in range(1,5+1):
-            #ddm[str(i)] = df["M_EdCat%d_Pct"%(i)].values
-            #ddf[str(i)] = df["F_EdCat%d_Pct"%(i)].values
-            ddt[str(i)] = df["EdCat%d_Pct"%(i)].values
+        #columnlabels = ["Rent_%s_Pct"%(lab) for lab in labels]
+        #cmap = sns.cubehelix_palette(8, start=0.5, rot=-.75, as_cmap=True)
+        #g = sns.PairGrid(df[columnlabels], diag_sharey=False)
+        #g.map_lower(sns.kdeplot, cmap=cmap)
+        #g.map_upper(plt.scatter)
+        #g.map_diag(sns.kdeplot, lw=2)


-        ######################
-        ######################
-        ### Wrong!!!
-        ### yy is a vector
-        ### this only plots 
-        ### the first element
-        ######################
-        ######################
+
+        ##########################
+        # pair grid
+        # 
+        # grouped to have fewer variables 
+
+        #new_labels = ["0-10","10-20","20-30","30-40","40-50","50+"]
+        #colpal = sns.color_palette("GnBu_d",len(new_labels))
+        #columnlabels = ["NewRent_%s_Pct"%(lab) for lab in new_labels]
+        #cmap = sns.cubehelix_palette(8, start=0.5, rot=-.75, as_cmap=True)
+        #g = sns.PairGrid(df[columnlabels], diag_sharey=False)
+        #g.map_lower(sns.kdeplot, cmap=cmap)
+        #g.map_upper(plt.scatter)
+        #g.map_diag(sns.kdeplot, lw=2)
+


-        xx = np.array(ddt.keys())
-        yy = np.array(ddt.values())
-        sns.barplot(xx,yy,palette=xcp,ax=ax1)

-        #xx = np.array(ddm.keys())
-        #yy = np.array(ddm.values())
-        #sns.barplot(xx,yy,palette=xcp,ax=ax2)

-        #xx = np.array(ddf.keys())
-        #yy = np.array(ddf.values())
-        #sns.barplot(xx,yy,palette=xcp,ax=ax3)

        plt.show()
        plt.draw()



-    plt.draw()
-    plt.show()



@ -219,4 +198,3 @@ def do_15002():
				@@ -219,4 +198,3 @@ def do_15002():
 if __name__=="__main__":
    main()

-
--- a/analysis/OneCity_Univariate.py
+++ b/analysis/OneCity_Univariate.py
@ -1,250 +0,0 @@
				@@ -1,250 +0,0 @@
-from pymongo import MongoClient
-
-import numpy as np
-import pandas as pd
-
-import matplotlib.pyplot as plt
-import seaborn as sns
-
-import statsmodels.api as sm
-from scipy import stats
-
-import metro as mt
-
-
-do_univariate_kde = True
-
-cities = ['Seattle']#,'Houston','Los Angeles','New York','Chicago']
-
-
-def main():
-    do_25070()
-
-
-def do_25070():
-
-    from Table25070 import Table25070
-
-    # Seaborn 
-    sns.set_palette("deep", desat=.6)
-    sns.set_context(rc={"figure.figsize": (8, 4)})
-
-    # Mongo
-    client = MongoClient()
-    db = client['metros']
-    metaprops = db['PropertiesMeta']
-    props = db['Properties']
-
-    for city in cities:
-
-        #########################
-        # Mongo lookup
-    
-        cbsa = mt.CBSACode(city)[0]
-    
-        pre_search = metaprops.find_one({'geoid':cbsa})
-    
-        mongo_search = props.find({'$and': [
-                                            {'metroid':cbsa},
-                                            {'geoid':{'$nin':[cbsa]}}
-                                            ]
-                                })
-
-        if 'B25070' not in pre_search['tables']:
-            print "Error: could not find table B25070 for city",city,"in db."
-            continue
-    
-        if mongo_search.count()==0:
-            print "Error: could not find city",city,"in db."
-            continue
-    
-
-        df = pd.DataFrame([])
-        srch = list(mongo_search)
-        for i,r in enumerate(srch):
-            if cbsa in r['geoid']:
-                del srch[i]
-                break
-        df = df.append(srch)
-        df = Table25070(df)
-
-
-
-        #######################
-        # Bar plot:
-    
-        f, ax1 = plt.subplots(1, 1, sharex=True, figsize=(12, 6))
-    
-
-        # ------------------------
-        # Subplot 1
-    
-        ax1.set_title(cities[0])
-
-        labels = ["0-10","10-15","15-20","20-25","25-30","30-35","35-40","40-50","50+"]
-        colpal = sns.color_palette("GnBu_d",len(labels))
-
-        ddt = {}
-        for i,lab in enumerate(labels):
-            tlab = "Rent_"+lab+"_Pct"
-            sns.kdeplot(df[tlab], shade=True, color=colpal[i], ax=ax1)
-        ax1.set_xlim([0.0,1.0])
-
-
-        plt.show()
-        plt.draw()
-
-
-
-
-
-
-
-
-
-def do_17001():
-
-    from Table17001 import Table17001
-
-
-    # Seaborn 
-    sns.set_palette("deep", desat=.6)
-    sns.set_context(rc={"figure.figsize": (8, 4)})
-    c1, c2, c3 = sns.color_palette("Set1", 3)
-    
-    # Mongo
-    client = MongoClient()
-    db = client['metros']
-    metaprops = db['PropertiesMeta']
-    props = db['Properties']
-        
-    if do_univariate_kde:
-    
-        for city in cities:
-    
-            #########################
-            # Mongo lookup
-    
-            cbsa = mt.CBSACode(city)[0]
-    
-            pre_search = metaprops.find_one({'geoid':cbsa})
-    
-            mongo_search = props.find({'$and': [
-                                                {'metroid':cbsa},
-                                                {'geoid':{'$nin':[cbsa]}}
-                                                ]
-                                    })
-    
-            if 'B17001' not in pre_search['tables']:
-                print "Error: could not find table B17001 for city",city,"in db."
-                continue
-    
-            if mongo_search.count()==0:
-                print "Error: could not find city",city,"in db."
-                continue
-    
-    
-            df = pd.DataFrame([])
-            srch = list(mongo_search)
-            for i,r in enumerate(srch):
-                if cbsa in r['geoid']:
-                    del srch[i]
-                    break
-            df = df.append(srch)
-            df = Table17001(df)
-    
-    
-            #######################
-            # Plot:
-            # Percent of people in poverty
-            # overall, and by gender
-            
-            f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
-            #ax1.set_title(cities[0])
-            sns.kdeplot(df['P_Pct'],    shade=True, color=c3, ax=ax1)
-            sns.kdeplot(df['M_P_PctM'], shade=True, color=c2, ax=ax2)
-            sns.kdeplot(df['F_P_PctF'], shade=True, color=c1, ax=ax2);
-            
-            ax1.set_xlim([0,1])
-            ax2.set_xlim([0,1])
-            #
-            #ax1.set_ylim([0,ylim])
-            #ax2.set_ylim([0,ylim])
-            
-            #f.savefig("onecity_univariate_"+city+"_1.jpg")
-    
-    
-    
-    
-            #######################
-            # Plot:
-            # Pre-adolescent poverty levels 
-            # (overall and by gender)
-    
-            xlim = 0.50
-            ylim = 5
-    
-            f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
-            #ax1.set_title(cities[0] + " " + cities[1] + "Pre-Adolesc")
-            sns.kdeplot( df['PAP_Pct'],    shade=True, color=c3, ax=ax1)
-            #sns.kdeplot(df2['PAP_Pct'],   shade=True, color=c2, ax=ax1)
-            sns.kdeplot( df['PAM_P_PctM'], shade=True, color=c2, ax=ax2)
-            sns.kdeplot( df['PAF_P_PctF'], shade=True, color=c1, ax=ax2);
-            
-            ax1.set_xlim([0,1])
-            ax2.set_xlim([0,1])
-            #
-            #ax1.set_ylim([0,ylim])
-            #ax2.set_ylim([0,ylim])
-            
-            #f.savefig("onecity_univariate_"+city+"_2.jpg")
-    
-    
-            ### #######################
-            ### # Plot:
-            ### # Juvenile poverty levels 
-            ### # (overall and by gender)
-        
-            ### f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
-            ### #ax1.set_title(cities[0] + " " + cities[1] + "Juveniles")
-            ### sns.kdeplot( df['JuvP_Pct'],    shade=True, color=c3, ax=ax1)
-            ### #sns.kdeplot(df2['JuvP_Pct'],   shade=True, color=c2, ax=ax1)
-            ### sns.kdeplot( df['JuvM_P_PctM'], shade=True, color=c2, ax=ax2)
-            ### sns.kdeplot( df['JuvF_P_PctF'], shade=True, color=c1, ax=ax2);
-            ### 
-            ### ax1.set_xlim([0,1])
-            ### ax2.set_xlim([0,1])
-            ### #
-            ### #ax1.set_ylim([0,ylim])
-            ### #ax2.set_ylim([0,ylim])
-            ### 
-            ### #f.savefig("onecity_univariate_"+city+"_3.jpg")
-        
-        
-    
-            #######################
-            # Plot:
-            # Senior poverty levels 
-            # (overall and by gender)
-    
-            f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
-            #ax1.set_title(cities[0] + " " + cities[1] + "Seniors")
-            sns.kdeplot( df['SrP_Pct'],    shade=True, color=c3, ax=ax1)
-            #sns.kdeplot(df2['SrP_Pct'],    shade=True, color=c2, ax=ax1)
-            sns.kdeplot( df['SrM_P_PctM'], shade=True, color=c2, ax=ax2)
-            sns.kdeplot( df['SrF_P_PctF'], shade=True, color=c1, ax=ax2);
-    
-            ax1.set_xlim([0,1])
-            ax2.set_xlim([0,1])
-            #
-            #ax1.set_ylim([0,ylim])
-            #ax2.set_ylim([0,ylim])
-            
-            #f.savefig(cities[0]+"_g2_3.jpg")
-    
-    
-    plt.show()
-    plt.draw()
-
-if __name__=="__main__":
-    main()
--- a/analysis/Table25013.py
+++ b/analysis/Table25013.py
@ -0,0 +1,66 @@
				@@ -0,0 +1,66 @@
+from pymongo import MongoClient
+
+import numpy as np
+import pandas as pd
+
+
+
+def Table25013(df):
+
+    # Populate a numpy ndarray
+    # with field names in correct positions
+
+    """
+    B25013 : Tenure by Educational Attainment of Householder
+        B25013001 : Total:
+            B25013002 : Owner-occupied housing units:
+                B25013003 : Less than high school graduate
+                B25013004 : High school graduate (including equivalency)
+                B25013005 : Some college or associate's degree
+                B25013006 : Bachelor's degree or higher
+            B25013007 : Renter-occupied housing units:
+                B25013008 : Less than high school graduate
+                B25013009 : High school graduate (including equivalency)
+                B25013010 : Some college or associate's degree
+                B25013011 : Bachelor's degree or higher
+    B25013universe : Occupied Housing Units
+    B25013yr : 2013
+    """
+
+
+    # tenure by educ attainment
+    total_fields = np.array(['B25013001',
+                             'B25013002',
+                             'B25013007'])
+
+    #                        col1: house
+    #                               col2: rent
+    table_fields = np.array([['B25013003','B25070008'], # Less than high school graduate               
+                             ['B25013004','B25070009'], # High school graduate (including equivalency) 
+                             ['B25013005','B25070010'], # Some college or associate's degree           
+                             ['B25013006','B25070011']  # Bachelor's degree or higher                  
+                             ])
+
+    tot_pop   = df[total_fields[0]]
+    house_pop = df[total_fields[1]]
+    rent_pop  = df[total_fields[2]]
+
+    df['TotalPop'] = tot_pop
+    df['HousePop'] = house_pop
+    df['RentPop'] = rent_pop
+
+    df['HousePct'] = house_pop/tot_pop
+    df['RentPct'] = rent_pop/tot_pop
+
+
+
+
+
+
+
+
+    df = df.fillna(0)
+
+    return df
+
+