In [1]:
# basic libraries
import numpy as np
import pandas as pd
In [45]:
basedata = pd.read_csv('2015TreeCensus.csv')
In [46]:
basedata.shape
Out[46]:
(683788, 45)
In [47]:
basedata.columns
Out[47]:
Index(['tree_id', 'block_id', 'created_at', 'tree_dbh', 'stump_diam',
       'curb_loc', 'status', 'health', 'spc_latin', 'spc_common', 'steward',
       'guards', 'sidewalk', 'user_type', 'problems', 'root_stone',
       'root_grate', 'root_other', 'trunk_wire', 'trnk_light', 'trnk_other',
       'brch_light', 'brch_shoe', 'brch_other', 'address', 'postcode',
       'zip_city', 'community board', 'borocode', 'borough', 'cncldist',
       'st_assem', 'st_senate', 'nta', 'nta_name', 'boro_ct', 'state',
       'latitude', 'longitude', 'x_sp', 'y_sp', 'council district',
       'census tract', 'bin', 'bbl'],
      dtype='object')
In [48]:
basedata = basedata[['tree_id', 'borough', 'spc_latin', 'spc_common', 'health', 'status']]
In [53]:
dead = basedata[basedata['status']=="Dead"]
In [54]:
manhattanDead = dead[dead['borough'] == 'Manhattan']
bronxDead = dead[dead['borough'] == 'Bronx']
queensDead = dead[dead['borough'] == 'Queens']
brooklynDead = dead[dead['borough'] == 'Brooklyn']
statenDead = dead[dead['borough'] == 'Staten Island']
In [57]:
print ('manhattan:  '+str(len(manhattanDead)))
print ('bronx:  '+str(len(bronxDead)))
print ('queens:  '+str(len(queensDead)))
print ('brooklyn:  '+str(len(brooklynDead)))
print ('staten:  '+str(len(statenDead)))
print ('total:  '+str(len(manhattanDead)+len(bronxDead)+len(queensDead)+len(brooklynDead)+len(statenDead)))
manhattan:  1802
bronx:  2530
queens:  4440
brooklyn:  3319
staten:  1870
total:  13961
In [58]:
dead.shape
Out[58]:
(13961, 6)
In [27]:
basedata = basedata.dropna()
In [28]:
basedata.shape
Out[28]:
(652167, 6)
In [29]:
treeSpecies = basedata.drop_duplicates('spc_common', keep='first')
In [30]:
treeSpecies.shape
Out[30]:
(132, 6)
In [31]:
treeSpecies.head()
Out[31]:
tree_id borough spc_latin spc_common health status
0 180683 Queens Acer rubrum red maple Fair Alive
1 200540 Queens Quercus palustris pin oak Fair Alive
2 204026 Brooklyn Gleditsia triacanthos var. inermis honeylocust Good Alive
4 189565 Brooklyn Tilia americana American linden Good Alive
9 192755 Brooklyn Platanus x acerifolia London planetree Fair Alive
In [32]:
manhattan = basedata[basedata['borough'] == 'Manhattan']
bronx = basedata[basedata['borough'] == 'Bronx']
queens = basedata[basedata['borough'] == 'Queens']
brooklyn = basedata[basedata['borough'] == 'Brooklyn']
staten = basedata[basedata['borough'] == 'Staten Island']
In [42]:
manhattanCount = manhattan.copy()
manhattanCount['count'] = manhattanCount.groupby('health')['tree_id'].transform('count')
manhattanCount = manhattanCount.drop_duplicates('health')
bronxCount = bronx.copy()
bronxCount['count'] = bronxCount.groupby('health')['tree_id'].transform('count')
bronxCount = bronxCount.drop_duplicates('health')
brooklynCount = brooklyn.copy()
brooklynCount['count'] = brooklynCount.groupby('health')['tree_id'].transform('count')
brooklynCount = brooklynCount.drop_duplicates('health')
queensCount = queens.copy()
queensCount['count'] = queensCount.groupby('health')['tree_id'].transform('count')
queensCount = queensCount.drop_duplicates('health')
statenCount = staten.copy()
statenCount['count'] = statenCount.groupby('health')['tree_id'].transform('count')
statenCount = statenCount.drop_duplicates('health')
In [43]:
manhattanCount
Out[43]:
tree_id borough spc_latin spc_common health status count
5 190422 Manhattan Gleditsia triacanthos var. inermis honeylocust Good Alive 47358
22 199760 Manhattan Quercus phellos willow oak Fair Alive 11460
328 198018 Manhattan Ginkgo biloba ginkgo Poor Alive 3609
In [44]:
manhattanCount.to_csv('manhattanHealth2015.csv')
bronxCount.to_csv('bronxHealth2015.csv')
brooklynCount.to_csv('brooklynHealth2015.csv')
queensCount.to_csv('queensHealth2015.csv')
statenCount.to_csv('statenHealth2015.csv')
treeSpecies.to_csv('allSpecies.csv')
In [ ]: