In [1]:
# basic libraries
import numpy as np
import pandas as pd
In [2]:
basedata = pd.read_csv('2015TreeCensus.csv')
In [3]:
basedata.head()
Out[3]:
tree_id block_id created_at tree_dbh stump_diam curb_loc status health spc_latin spc_common ... boro_ct state latitude longitude x_sp y_sp council district census tract bin bbl
0 180683 348711 08/27/2015 3 0 OnCurb Alive Fair Acer rubrum red maple ... 4073900 New York 40.723092 -73.844215 1027431.148 202756.7687 29.0 739.0 4052307.0 4.022210e+09
1 200540 315986 09/03/2015 21 0 OnCurb Alive Fair Quercus palustris pin oak ... 4097300 New York 40.794111 -73.818679 1034455.701 228644.8374 19.0 973.0 4101931.0 4.044750e+09
2 204026 218365 09/05/2015 3 0 OnCurb Alive Good Gleditsia triacanthos var. inermis honeylocust ... 3044900 New York 40.717581 -73.936608 1001822.831 200716.8913 34.0 449.0 3338310.0 3.028870e+09
3 204337 217969 09/05/2015 10 0 OnCurb Alive Good Gleditsia triacanthos var. inermis honeylocust ... 3044900 New York 40.713537 -73.934456 1002420.358 199244.2531 34.0 449.0 3338342.0 3.029250e+09
4 189565 223043 08/30/2015 21 0 OnCurb Alive Good Tilia americana American linden ... 3016500 New York 40.666778 -73.975979 990913.775 182202.4260 39.0 165.0 3025654.0 3.010850e+09

5 rows × 45 columns

In [4]:
basedata.shape
Out[4]:
(683788, 45)
In [5]:
basedata = basedata[['tree_id', 'borough', 'spc_latin', 'spc_common', 'status', 'latitude', 'longitude']]
In [11]:
basedata = basedata.dropna()
In [12]:
basedata.shape
Out[12]:
(652169, 7)
In [13]:
manhattan = basedata[basedata['borough'] == 'Manhattan']
bronx = basedata[basedata['borough'] == 'Bronx']
queens = basedata[basedata['borough'] == 'Queens']
brooklyn = basedata[basedata['borough'] == 'Brooklyn']
staten = basedata[basedata['borough'] == 'Staten Island']
In [14]:
print ('manhattan:  '+str(len(manhattan)))
print ('bronx:  '+str(len(bronx)))
print ('queens:  '+str(len(queens)))
print ('brooklyn:  '+str(len(brooklyn)))
print ('staten:  '+str(len(staten)))
print ('total:  '+str(len(manhattan)+len(bronx)+len(queens)+len(brooklyn)+len(staten)))
manhattan:  62428
bronx:  80584
queens:  237970
brooklyn:  169744
staten:  101443
total:  652169
In [15]:
manhattanCount = manhattan.copy()
manhattanCount['count'] = manhattanCount.groupby('spc_common')['tree_id'].transform('count')
manhattanCount = manhattanCount.drop_duplicates('spc_common')
In [17]:
manhattanCount.head()
Out[17]:
tree_id borough spc_latin spc_common status latitude longitude count
5 190422 Manhattan Gleditsia triacanthos var. inermis honeylocust Alive 40.770046 -73.984950 13176
7 208649 Manhattan Tilia americana American linden Alive 40.762724 -73.987297 1583
22 199760 Manhattan Quercus phellos willow oak Alive 40.782087 -73.980964 889
40 208346 Manhattan Platanus x acerifolia London planetree Alive 40.782587 -73.974840 4122
60 200225 Manhattan Quercus palustris pin oak Alive 40.789936 -73.977212 4584
In [18]:
bronxCount = bronx.copy()
bronxCount['count'] = bronxCount.groupby('spc_common')['tree_id'].transform('count')
bronxCount = bronxCount.drop_duplicates('spc_common')
brooklynCount = brooklyn.copy()
brooklynCount['count'] = brooklynCount.groupby('spc_common')['tree_id'].transform('count')
brooklynCount = brooklynCount.drop_duplicates('spc_common')
queensCount = queens.copy()
queensCount['count'] = queensCount.groupby('spc_common')['tree_id'].transform('count')
queensCount = queensCount.drop_duplicates('spc_common')
statenCount = staten.copy()
statenCount['count'] = statenCount.groupby('spc_common')['tree_id'].transform('count')
statenCount = statenCount.drop_duplicates('spc_common')
In [19]:
statenCount.shape
Out[19]:
(131, 8)
In [23]:
manhattan.to_csv('manhattan2015.csv')
manhattanCount.to_csv('manhattanCount2015.csv')
bronx.to_csv('bronx2015.csv')
bronxCount.to_csv('bronxCount2015.csv')
brooklyn.to_csv('brooklyn2015.csv')
brooklynCount.to_csv('brooklynCount2015.csv')
queens.to_csv('queens2015.csv')
queensCount.to_csv('queensCount2015.csv')
staten.to_csv('staten2015.csv')
statenCount.to_csv('statenCount2015.csv')
In [ ]:
 
In [ ]: