# basic libraries
import numpy as np
import pandas as pd
basedata = pd.read_csv('2015TreeCensus.csv')
basedata.head()
basedata.shape
basedata = basedata[['tree_id', 'borough', 'spc_latin', 'spc_common', 'status', 'latitude', 'longitude']]
basedata = basedata.dropna()
basedata.shape
manhattan = basedata[basedata['borough'] == 'Manhattan']
bronx = basedata[basedata['borough'] == 'Bronx']
queens = basedata[basedata['borough'] == 'Queens']
brooklyn = basedata[basedata['borough'] == 'Brooklyn']
staten = basedata[basedata['borough'] == 'Staten Island']
print ('manhattan: '+str(len(manhattan)))
print ('bronx: '+str(len(bronx)))
print ('queens: '+str(len(queens)))
print ('brooklyn: '+str(len(brooklyn)))
print ('staten: '+str(len(staten)))
print ('total: '+str(len(manhattan)+len(bronx)+len(queens)+len(brooklyn)+len(staten)))
manhattanCount = manhattan.copy()
manhattanCount['count'] = manhattanCount.groupby('spc_common')['tree_id'].transform('count')
manhattanCount = manhattanCount.drop_duplicates('spc_common')
manhattanCount.head()
bronxCount = bronx.copy()
bronxCount['count'] = bronxCount.groupby('spc_common')['tree_id'].transform('count')
bronxCount = bronxCount.drop_duplicates('spc_common')
brooklynCount = brooklyn.copy()
brooklynCount['count'] = brooklynCount.groupby('spc_common')['tree_id'].transform('count')
brooklynCount = brooklynCount.drop_duplicates('spc_common')
queensCount = queens.copy()
queensCount['count'] = queensCount.groupby('spc_common')['tree_id'].transform('count')
queensCount = queensCount.drop_duplicates('spc_common')
statenCount = staten.copy()
statenCount['count'] = statenCount.groupby('spc_common')['tree_id'].transform('count')
statenCount = statenCount.drop_duplicates('spc_common')
statenCount.shape
manhattan.to_csv('manhattan2015.csv')
manhattanCount.to_csv('manhattanCount2015.csv')
bronx.to_csv('bronx2015.csv')
bronxCount.to_csv('bronxCount2015.csv')
brooklyn.to_csv('brooklyn2015.csv')
brooklynCount.to_csv('brooklynCount2015.csv')
queens.to_csv('queens2015.csv')
queensCount.to_csv('queensCount2015.csv')
staten.to_csv('staten2015.csv')
statenCount.to_csv('statenCount2015.csv')