R/02_summarizing_data_copy.R

# ### predicting the fine detritus from the rest of the data####################
#
# ## should be no copying over
# where_values <- function(x) which(!is.na(x))
#
#
# fine_cardoso2008<- function(coarse){
#   exp(0.68961 * log(coarse) - 0.11363)
# }
#
# detritus_wider <- detritus_wider %>%
#   mutate(detritus0_150 = ifelse(visit_id == 21, fine_cardoso2008(detritus150_20000), detritus0_150))
#
#
# ## for saba datasetid=111 visits 451 121 126 116 detritus150_20000 detritus20000_NA
# fine_saba2009 <- function(med){
#   exp(0.79031 * log(med) - 0.070033)
# }
#
# detritus_wider<-detritus_wider %>%
#   mutate(detritus0_1500 = ifelse(dataset_id == 111, fine_saba2009(detritus1500_20000), NA))

## Although Puerto Rico 2010 dataset=116 based only on relaxed diameter the adj r sq is 0.79
#eqn from all 1990s El verde plants (n=189, rsq = 0.78) interestingly v. similar eqn from pitilla 2002 secondary

# elverde90s <- detritus_wider %>%
#   filter(dataset_id==131| dataset_id==126|dataset_id==121|dataset_id==221)
#
# elverde90s$detritus0_NA <- with(elverde90s, detritus10_1500 + detritus1500_20000 + detritus20000_NA)
# summary(glm(log(detritus0_NA)~log(diameter), data=elverde90s))
# plot(log(elverde90s$detritus0_NA)~log(elverde90s$diameter))
#
# total_elverde2010 <- function(dia){
#   exp(-6.223+ 2.179* log(dia))
# }
#
# detritus_wider<-detritus_wider %>%
#   mutate(detritus0_NA = ifelse(dataset_id == 116, total_elverde2010(diameter), NA))

#Dominica dataset=136, visit = 191, 196, 201 is just fine

#Macae detritus could not be estimated with rsq > 0.5

# #argentina Las gamas datasets 166, 171, 181
#
# fine_lasgamas<- function(med){
#   ((0.9857 *med) + 1.496)
# }
#
# detritus_wider<-detritus_wider %>%
#   mutate(detritus0_150 = ifelse(dataset_id%in%c(166,171,181), fine_lasgamas(detritus150_850), detritus0_150))

#French Guiana only 186 (petit saut 2007) has fpom in ml, 211 (sinnamary 2011) has fpom cpom and deadleaves

# #this allometric equation is from data held offline by regis
# fpom_convert_ml_into_g<-function(FPOMml){(0.0013*(FPOMml)^2+0.0243*(FPOMml)+0.0369)}
# fpom_convert_ml_into_g(7.6)

#now french guiana Sinnamary (dataset 211) generate detritus from detritus allometric equations and fns
#NOTE there is an input error: dead_leaves in Sinnamary was detritus>2cm x 2cm roughly, but in other sites #brown bromeliad leaves!

# sinn<-detritus_wider%>%filter(dataset_id==211)
#
# summary(glm(log(cpom_g)~log(fpom_g), data=sinn))#sinnamary based eqn has rsq of 0.64
#
# plot(log(sinn$cpom_g)~log(sinn$fpom_g))

# summary(glm(log(dead_leaves)~log(fpom_g), data=sinn))#sinnamary based eqn has rsq of 0.36
#
# plot((sinn$dead_leaves)~(sinn$fpom_g))
# sinn$detover150<-sinn$fpom_g+sinn$cpom_g+sinn$dead_leaves
# summary(glm(log(detover150)~log(fpom_g), data=sinn))#sinnamary based eqn has rsq of 0.59
# plot(log(sinn$detover150)~log(sinn$fpom_g))

# over150_frenchguiana<- function(a){
#   exp(0.688*(a)+3.075)
# }

# cpom_frenchguiana<- function(FPOMg){
#   exp(0.858*log(FPOMg)+1.872)
# }

# largedet_frenchguiana<- function(FPOMg){
#   exp(0.582*log(FPOMg)+2.5545)
# }


#next apply sinnamary-based allometric eqns to dataset 186 (petit saut 2007) where we do the rather dangerous thing
#of converting fpom in ml to fpom in grams to predict detrital grams in another 2 categories
#we do the same thing for petit saut 2014(dataset 216), but here we at least start with fpom in mg

# detritus_wider<-detritus_wider %>%
#   mutate(detritus0_150 = ifelse(dataset_id==186, fpom_convert_ml_into_g(fpom_ml), detritus0_150))%>%
#   mutate(detritus150_20000 = ifelse(dataset_id==186, cpom_frenchguiana(detritus0_150), detritus150_20000))%>%
#   mutate(detritus20000_NA= ifelse(dataset_id==186, largedet_frenchguiana(detritus0_150), detritus20000_NA))
#
# detritus_wider<-detritus_wider %>%
#   mutate(detritus150_20000 = ifelse(dataset_id==216, cpom_frenchguiana(detritus0_150), detritus150_20000))%>%
#   mutate(detritus20000_NA= ifelse(dataset_id==216, largedet_frenchguiana(detritus0_150), detritus20000_NA))

# #next, in dataset 201 need to convert fpom_ml to g then make it into detritus0_150,
#
# detritus_wider<-detritus_wider %>%
#   mutate(detritus0_150 = ifelse(dataset_id==201,fpom_convert_ml_into_g(fpom_ml), detritus0_150))
# detritus_wider<-detritus_wider%>%
#  mutate(detritus150_NA = ifelse(dataset_id==201, over150_frenchguiana(detritus0_150), detritus150_NA))

#DIANE TO DO: chek after dataset updated: detritus_wider%>%filter(dataset_id%in%c(201, 206, 211, 216, 186))%>%View

#pitilla costa rica 200 all present
#pitilla costa rica 2002, 2010 are dataset61, 71; pitilla1997 dataset id is 51

# fine_pitilla<- function(med, coarse){
#   exp(0.79031 * log(med+coarse) - 0.07033)
# }#R2= 0.8965
# deadleaves_pitilla<- function(med,coarse){
#   exp(1.01680 * log(med+coarse) - 1.09992)
# }#R2= 0.776
#
# detritus_wider<-detritus_wider%>%
#   mutate(detritus0_150 = ifelse(dataset_id%in%c(51,61), fine_pitilla(detritus150_850, detritus850_20000), detritus0_150))
# detritus_wider<-detritus_wider %>%
#   mutate(detritus20000_NA = ifelse(dataset_id==51, deadleaves_pitilla(detritus150_850, detritus850_20000), detritus20000_NA))
#
#
# detritus_wider<-detritus_wider %>%
#   mutate(detritus20000_NA = ifelse(dataset_id==71, deadleaves_pitilla(0,detritus150_20000), detritus20000_NA))%>%
#   mutate(detritus0_150 = ifelse(dataset_id==71, fine_pitilla(0,detritus150_20000), detritus0_150))
#

#pitilla 2004 dissection visit 66

# ## creating a dataset for generating a model.
# pitilla2000s<-detritus_wider%>%filter(dataset_id==56)
# pitilla2000s<-pitilla2000s%>%mutate(detritus0_NA = ifelse(visit_id==51,
#                                                           pitilla2000s$detritus0_150+pitilla2000s$detritus150_850+
#                                                             pitilla2000s$detritus20000_NA+pitilla2000s$detritus850_20000,
#                                                           pitilla2000s$detritus0_150+pitilla2000s$detritus150_850+
#                                                             pitilla2000s$detritus20000_NA+pitilla2000s$detritus1500_20000+
#                                                             pitilla2000s$detritus850_1500))
# summary(glm((detritus0_NA)~(diameter), family=gaussian, data=pitilla2000s)) #rsq=0.78
# plot((pitilla2000s$detritus0_NA)~(pitilla2000s$diameter))
#
# totaldet_pitilla<- function(dia){
#   (0.7798 * dia - 24.147)
# }
# detritus_wider<-detritus_wider %>%
# #   mutate(detritus0_NA = ifelse(dataset_id == 66, totaldet_pitilla(diameter), detritus0_NA))
#
# #Columbia Sisga Guasca datasets 76, 81, and Rio Blanco 2014 (dataset 91) base on pitilla
# pitilla2000s$detritus150_NA<-pitilla2000s$detritus0_NA-pitilla2000s$detritus0_150
# summary(glm((detritus0_150)~(detritus150_NA), family=gaussian, data=pitilla2000s))
# plot((pitilla2000s$detritus0_150)~(pitilla2000s$detritus150_NA))
#
# finealso_pitilla<- function(most){
# #   (0.407 *most - 0.36633)} #rsq=0.95
#
# detritus_wider<-detritus_wider%>%
#   mutate(detritus0_150 = ifelse(dataset_id%in%c(76,81, 91), finealso_pitilla(detritus150_NA), detritus0_150))

#Colombia RioBlanco2012 dataset86 base it on the 2014 rio blanco data
# rioblanco<-detritus_wider%>%filter(dataset_id==91)
# summary(glm((detritus150_NA)~(num_leaf), family=gaussian, data=rioblanco))
# plot((rioblanco$detritus150_NA)~(rioblanco$num_leaf))
# #the best model here had an rsquared of 0.52 so we decided to exclude the rio blanco 2012 dataset (86)

# #honduras dataset 101 106 has detritus 22- 10000, we could estimate 20000 and greater and ignore the amount missed?
# pitilla2000s$detritus0_20000<-pitilla2000s$detritus0_NA-pitilla2000s$detritus20000_NA
# summary(glm((detritus20000_NA)~log(detritus0_20000), family=poisson, data=pitilla2000s))#rsq=0.67
# plot((pitilla2000s$detritus20000_NA)~(pitilla2000s$detritus0_20000))
#
# deadleavesalso_pitilla<- function(almost){
#   exp(0.694 *log(almost) - 0.468)
# }
#
# detritus_wider<-detritus_wider%>%
#   mutate(detritus10000_NA = ifelse(dataset_id%in%c(101,106), deadleavesalso_pitilla(detritus22_10000), NA))


# ### Cardoso 2011
# vis_46 <- detritus_wider %>%
#   filter(visit_id==46)
#
# cardoso2011<-detritus_wider %>%
#   filter(visit_id==231)
#
# plot(log(vis_46$detritus0_150),log(vis_46$detritus150_850+vis_46$detritus850_1500))
#
# # plot(log(cardoso2011$detritus0_150),log(cardoso2011$detritus150_NA))
#
# summary(lm(log(detritus0_150)~log(vis_46$detritus150_850+vis_46$detritus850_1500),data=vis_46 ))
#
# ### assumption, we considered 150_850 + 850_1500 equivalent to 150_NA; sample size = 11
#
# fine_cardoso2011<- function(coarse){
#   exp(0.66648 * log(coarse) + 0.80186)
# }
#
# detritus_wider <- detritus_wider %>%
#   mutate(detritus0_150 = ifelse(visit_id == 231, fine_cardoso2011(detritus150_NA), detritus0_150))

### Picinguaba2011 - detritus125_NA is actually total detritus0_NA. It must be corrected in the database.

# P2011 <- detritus_wider %>%
#   filter(visit_id==241)
#
# correctedP2011<-P2011$detritus125_NA
# correct125_NA<-rep(NA,20)
# length(correctedP2011)
# detritus_wider <- detritus_wider %>%
#   mutate(detritus0_NA = ifelse(visit_id == 241, correctedP2011, detritus0_NA)) %>%
#   mutate(detritus125_NA = ifelse(visit_id == 241, correct125_NA, detritus125_NA))


### Jureia2013 - detritus125_800 is total detritus0_NA. It must be corrected in the database.
#
# J2013 <- detritus_wider %>%
#   filter(visit_id==246)
#
# correctedJ2013<-J2013$detritus125_800
# correct125_800<-rep(NA,20)
#
# detritus_wider <- detritus_wider %>%
#   mutate(detritus0_NA = ifelse(visit_id == 246, correctedJ2013, detritus0_NA)) %>%
#   mutate(detritus125_800 = ifelse(visit_id == 246, correct125_800, detritus125_800))
#

#### Serra do Japi
### we have detritus125_NA, need detritus0_125

# # Japi2013 <- detritus_wider %>%
# #   filter(visit_id==251|visit_id==346)
# vis_46 <- detritus_wider %>%
#   filter(visit_id==46)
#
# plot(log(vis_46$detritus0_150),log(vis_46$detritus150_850+vis_46$detritus850_1500+vis_46$detritus1500_20000+vis_46$detritus20000_NA))
# summary(lm(log(detritus0_150)~log(vis_46$detritus150_850+vis_46$detritus850_1500+vis_46$detritus1500_20000+vis_46$detritus20000_NA),data=vis_46 ))
#
# fine_SJ2013<- function(coarse){
#   exp(0.70297 * log(coarse) -0.13327)
# }
#
# ### assumption, we considered 150_850 + 850_1500 + 1550-20000 + 20000_NA equivalent to 125_NA; sample size = 11
#
# detritus_wider <- detritus_wider %>%
#   mutate(detritus0_150 = ifelse(visit_id == 251|visit_id==346, fine_SJ2013(detritus125_NA), detritus0_150))

# x<-c(2,3,4,5,NA,7)
# y<-c(2,3,4,5,6,7)
# z<-c(NA, NA, NA)
#
# na.checker<-function(a){
#   ifelse((mean(a, na.rm=TRUE)*length(a)-sum(a, na.rm=TRUE))>0,
#          1,
#          ifelse(mean(a,na.rm=TRUE)>0,
#                 2,
#                 NA_real_))
# }
#
# ### Function identified missing data that was not necessarily correct.
#
# na.checker(x)
# na.checker(y)
# na.checker(z)
#
# visitdatanames<- visits %>%
#   select(visit_id, dataset_name)
#
# detrital_tableNA <- detritus_wider %>%
#   select(-bromeliad_id, -dataset_name) %>%  #-bromeliad_id, -dataset_name
#   group_by(visit_id) %>%
#   #summarize(check = na.checker(detritus0_NA))%>%
#   summarise_each(funs(nas = "na.checker"(.)))%>%
#   left_join(visitdatanames)


#write.csv(detrital_tableNA, "data-intermediate/detrital_table_NA.csv")
# visualize with a daff ---------------------------------------------------


daff::render_diff(daff::diff_data(detritus_original, detritus_wider))

# write data out ----------------------------------------------------------

write_csv(detritus_wider, "data-raw/02_broms.csv")
SrivastavaLab/cesabfunctionalwebsdata documentation built on May 9, 2019, 1:55 p.m.