In [1]:
# basic libraries
import numpy as np
import pandas as pd
In [2]:
basedata = pd.read_csv('2005_Street_Tree_Census.csv')
/Users/hy/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (37) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
In [3]:
basedata.head()
Out[3]:
OBJECTID cen_year tree_dbh address tree_loc pit_type soil_lvl status spc_latin spc_common ... state latitude longitude x_sp y_sp objectid_1 census tract bin bbl Location 1
0 592373 2005 6 1139 57 STREET Front Sidewalk Pit Level Good PYRUS CALLERYANA PEAR, CALLERY ... New York 40.632653 -74.000245 984182 169769 0 216.0 3140038.0 3.056890e+09 (40.63265321, -74.00024499)
1 592374 2005 6 2220 BERGEN AVENUE Across Sidewalk Pit Level Good PLATANUS ACERIFOLIA LONDON PLANETREE ... New York 40.620084 -73.901453 1011608 165205 1 706.0 3238037.0 3.084440e+09 (40.62008375, -73.9014528)
2 592375 2005 13 2360 BERGEN AVENUE Front Continuous Pit Level Good ACER PLATANOIDES CRIMSON KING MAPLE, NORWAY-CR KNG ... New York 40.617996 -73.899111 1012259 164445 2 706.0 3238299.0 3.084530e+09 (40.61799567, -73.89911096)
3 592376 2005 13 2254 BERGEN AVENUE Across Sidewalk Pit Level Good PLATANUS ACERIFOLIA LONDON PLANETREE ... New York 40.619694 -73.901003 1011733 165063 3 706.0 3238045.0 3.084440e+09 (40.6196936, -73.90100311)
4 592377 2005 15 2332 BERGEN AVENUE Across Sidewalk Pit Level Good PLATANUS ACERIFOLIA LONDON PLANETREE ... New York 40.618323 -73.899467 1012160 164564 4 706.0 3238294.0 3.084530e+09 (40.61832261, -73.89946707)

5 rows × 54 columns

In [4]:
basedata.shape
Out[4]:
(683788, 45)
In [10]:
basedata = basedata.rename(columns={'OBJECTID':'tree_id', 'boroname':'borough'})
In [11]:
basedata.columns
Out[11]:
Index(['tree_id', 'cen_year', 'tree_dbh', 'address', 'tree_loc', 'pit_type',
       'soil_lvl', 'status', 'spc_latin', 'spc_common', 'vert_other',
       'vert_pgrd', 'vert_tgrd', 'vert_wall', 'horz_blck', 'horz_grate',
       'horz_plant', 'horz_other', 'sidw_crack', 'sidw_raise', 'wire_htap',
       'wire_prime', 'wire_2nd', 'wire_other', 'inf_canopy', 'inf_guard',
       'inf_wires', 'inf_paving', 'inf_outlet', 'inf_shoes', 'inf_lights',
       'inf_other', 'trunk_dmg', 'zipcode', 'zip_city', 'cb_num', 'borocode',
       'borough', 'cncldist', 'st_assem', 'st_senate', 'nta', 'nta_name',
       'boro_ct', 'state', 'latitude', 'longitude', 'x_sp', 'y_sp',
       'objectid_1', 'census tract', 'bin', 'bbl', 'Location 1'],
      dtype='object')
In [12]:
basedata = basedata[['tree_id', 'borough', 'spc_latin', 'spc_common', 'status', 'latitude', 'longitude']]
In [13]:
basedata = basedata.dropna()
In [14]:
basedata.shape
Out[14]:
(592372, 7)
In [15]:
manhattan = basedata[basedata['borough'] == 'Manhattan']
bronx = basedata[basedata['borough'] == 'Bronx']
queens = basedata[basedata['borough'] == 'Queens']
brooklyn = basedata[basedata['borough'] == 'Brooklyn']
staten = basedata[basedata['borough'] == 'Staten Island']
In [16]:
print ('manhattan:  '+str(len(manhattan)))
print ('bronx:  '+str(len(bronx)))
print ('queens:  '+str(len(queens)))
print ('brooklyn:  '+str(len(brooklyn)))
print ('staten:  '+str(len(staten)))
print ('total:  '+str(len(manhattan)+len(bronx)+len(queens)+len(brooklyn)+len(staten)))
manhattan:  49886
bronx:  59925
queens:  240008
brooklyn:  142852
staten:  0
total:  492671
In [17]:
manhattanCount = manhattan.copy()
manhattanCount['count'] = manhattanCount.groupby('spc_common')['tree_id'].transform('count')
manhattanCount = manhattanCount.drop_duplicates('spc_common')
In [18]:
manhattanCount.head()
Out[18]:
tree_id borough spc_latin spc_common status latitude longitude count
17 592390 Manhattan GLEDITSIA TRIACANTHOS HONEYLOCUST Poor 40.728994 -73.984305 11529
18 592391 Manhattan TILIA CORDATA LINDEN, LITTLE LEAF Good 40.729172 -73.984175 3093
20 592393 Manhattan PYRUS CALLERYANA PEAR, CALLERY Excellent 40.708353 -74.012584 7748
25 592398 Manhattan PLATANUS ACERIFOLIA LONDON PLANETREE Good 40.706009 -74.013150 4029
35 592408 Manhattan UNKNOWN UNKNOWN Poor 40.705743 -74.013381 1078
In [19]:
bronxCount = bronx.copy()
bronxCount['count'] = bronxCount.groupby('spc_common')['tree_id'].transform('count')
bronxCount = bronxCount.drop_duplicates('spc_common')
brooklynCount = brooklyn.copy()
brooklynCount['count'] = brooklynCount.groupby('spc_common')['tree_id'].transform('count')
brooklynCount = brooklynCount.drop_duplicates('spc_common')
queensCount = queens.copy()
queensCount['count'] = queensCount.groupby('spc_common')['tree_id'].transform('count')
queensCount = queensCount.drop_duplicates('spc_common')
statenCount = staten.copy()
statenCount['count'] = statenCount.groupby('spc_common')['tree_id'].transform('count')
statenCount = statenCount.drop_duplicates('spc_common')
In [22]:
manhattan.to_csv('manhattan2015.csv')
manhattanCount.to_csv('manhattanCount2005.csv')
bronx.to_csv('bronx2005.csv')
bronxCount.to_csv('bronxCount2005.csv')
brooklyn.to_csv('brooklyn2005.csv')
brooklynCount.to_csv('brooklynCount2005.csv')
queens.to_csv('queens2005.csv')
queensCount.to_csv('queensCount2005.csv')
staten.to_csv('staten2005.csv')
statenCount.to_csv('statenCount2005.csv')
In [ ]: