# basic libraries
import numpy as np
import pandas as pd
basedata = pd.read_csv('2015TreeCensus.csv')
basedata.shape
basedata.columns
basedata = basedata[['tree_id', 'borough', 'spc_latin', 'spc_common', 'health', 'status']]
dead = basedata[basedata['status']=="Dead"]
manhattanDead = dead[dead['borough'] == 'Manhattan']
bronxDead = dead[dead['borough'] == 'Bronx']
queensDead = dead[dead['borough'] == 'Queens']
brooklynDead = dead[dead['borough'] == 'Brooklyn']
statenDead = dead[dead['borough'] == 'Staten Island']
print ('manhattan: '+str(len(manhattanDead)))
print ('bronx: '+str(len(bronxDead)))
print ('queens: '+str(len(queensDead)))
print ('brooklyn: '+str(len(brooklynDead)))
print ('staten: '+str(len(statenDead)))
print ('total: '+str(len(manhattanDead)+len(bronxDead)+len(queensDead)+len(brooklynDead)+len(statenDead)))
dead.shape
basedata = basedata.dropna()
basedata.shape
treeSpecies = basedata.drop_duplicates('spc_common', keep='first')
treeSpecies.shape
treeSpecies.head()
manhattan = basedata[basedata['borough'] == 'Manhattan']
bronx = basedata[basedata['borough'] == 'Bronx']
queens = basedata[basedata['borough'] == 'Queens']
brooklyn = basedata[basedata['borough'] == 'Brooklyn']
staten = basedata[basedata['borough'] == 'Staten Island']
manhattanCount = manhattan.copy()
manhattanCount['count'] = manhattanCount.groupby('health')['tree_id'].transform('count')
manhattanCount = manhattanCount.drop_duplicates('health')
bronxCount = bronx.copy()
bronxCount['count'] = bronxCount.groupby('health')['tree_id'].transform('count')
bronxCount = bronxCount.drop_duplicates('health')
brooklynCount = brooklyn.copy()
brooklynCount['count'] = brooklynCount.groupby('health')['tree_id'].transform('count')
brooklynCount = brooklynCount.drop_duplicates('health')
queensCount = queens.copy()
queensCount['count'] = queensCount.groupby('health')['tree_id'].transform('count')
queensCount = queensCount.drop_duplicates('health')
statenCount = staten.copy()
statenCount['count'] = statenCount.groupby('health')['tree_id'].transform('count')
statenCount = statenCount.drop_duplicates('health')
manhattanCount
manhattanCount.to_csv('manhattanHealth2015.csv')
bronxCount.to_csv('bronxHealth2015.csv')
brooklynCount.to_csv('brooklynHealth2015.csv')
queensCount.to_csv('queensHealth2015.csv')
statenCount.to_csv('statenHealth2015.csv')
treeSpecies.to_csv('allSpecies.csv')