############################################################
# Helper functions used to organise the FDP datasets
# Marco Visser, Gamboa, Panama, February 2014
############################################################
#' Prepare Growth and Survival datasets
#'
#' \code{prepareFDPdata} - prepares raw data for analysis in two
#' posible formats for each individual for the BCI
#' plot data.
#'
#' @param FDPobjects A vector of fdp census objects stored in
#' the global environment
#' @param census Which census is included? The "tree" census (> 1cm dbh)
#' or the "seedling" census.
#' @param type Which format is desired? Can be either 'paired' or 'trajectory'
#' A paired format, is where each consecutive census is paired. This is the
#' standard growth analysis format. Or a trajectory format, where all data are
#' organized next to each other (this is for growth trajectory analysis).
#' @param SpFitList A vector of 6 char species codes, if null all species
#' are used.
#'
#' @author Marco D. Visser
#'
#' @export
prepareFDPdata <- function(FDPobjects = ls()[grep("bci.full",ls())][3:7],
census = "tree", type = 'paired',SpFitList=NULL) {
if(census=='seedling'){
if(type=='paired') {
prepdata <- lapply(FDPobjects, function(X) subset(get(X),
sp%in%SpFitList))
dbhhght <- lapply(prepdata,function(X) X[,c("sp","dbh","hght","status","date",
"id")])
censuspairs <- vector("list",length(dbhhght)-1)
for(i in 1:length(censuspairs)){
censuspairs[[i]] <- c((1:(length(FDPobjects)-1))[i],(2:length(FDPobjects))[i])
}
tempdatapairs <- lapply(censuspairs, function(X) {
cbind(dbhhght[[X[1]]],dbhhght[[X[2]]])
})
censustracker <- lapply(censuspairs, function(X) {
rep(paste(X,collapse=''), nrow(dbhhght[[X[1]]]))
})
finalgrowthlong <- do.call(rbind,tempdatapairs)
names(finalgrowthlong) <- c(paste(names(dbhhght[[1]]),1,sep=''),
paste(names(dbhhght[[1]]),2,sep=''))
finalgrowthlong$census <- as.factor(unlist(censustracker))
rm(tempdatapairs)
growclean<-finalgrowthlong
class(growclean) <- c(class(growclean),"fdpdata",type,census)
}
else {
## combine Joe and Liza's census data in one format
if(is.null(SpFitList)){SpFitList <- unique(get(FDPobjects[1])$sp)}
prepdata <- lapply(FDPobjects, function(X) subset(get(X),
sp%in%toupper(SpFitList)))
## subset data and only include what is needed
censusdata <- lapply(prepdata,function(X)
X[,c("sp","id","plot","hght","dbh","status","date")])
## extract data
dbhdata <- do.call(cbind,lapply(censusdata,function(X) X[,"dbh"]))
hghtdata <- do.call(cbind,lapply(censusdata,function(X) X[,"hght"]))
survivaldata <- do.call(cbind,lapply(censusdata,function(X)
as.character(X[,"status"])))
datedata <- do.call(cbind,lapply(censusdata,function(X) as.Date(X[,"date"])))
## rename columns
colnames(dbhdata) = paste0("dbh",1:dim(dbhdata)[2])
colnames(survivaldata) = paste0("survival",1:dim(survivaldata)[2])
colnames(hghtdata) = paste0("hght",1:dim(hghtdata)[2])
colnames(datedata) = paste0("date",1:dim(datedata)[2])
## get meta data
metadata <- censusdata[[1]][,c("id","sp")]
growclean <- cbind(metadata,dbhdata,hghtdata,survivaldata,datedata)
class(growclean) <- c(class(growclean),"fdpdata",type,census)
}
}
if(census=='tree'){
if(is.null(SpFitList)){SpFitList <- unique(get(FDPobjects[1])$sp)}
prepdata <- lapply(FDPobjects, function(X) subset(get(X),
sp%in%tolower(SpFitList)))
## subset data and only include what is needed
censusdata <- lapply(prepdata,function(X)
X[,c("sp","tag","dbh","DFstatus","date","codes")])
if(type=='paired') {
censuspairs <- vector("list",length(censusdata)-1)
for(i in 1:length(censuspairs)){
censuspairs[[i]] <- c((1:(length(FDPobjects)-1))[i],(2:length(FDPobjects))[i])
}
tempdatapairs <- lapply(censuspairs, function(X) {
cbind(censusdata[[X[1]]],censusdata[[X[2]]])
})
censustracker <- lapply(censuspairs, function(X) {
rep(paste(X,collapse=''), nrow(censusdata[[X[1]]]))
})
finalgrowthlong <- do.call(rbind,tempdatapairs)
names(finalgrowthlong) <- c(paste(names(censusdata[[1]]),1,sep=''),
paste(names(censusdata[[1]]),2,sep=''))
finalgrowthlong$census <- as.factor(unlist(censustracker))
rm(tempdatapairs)
growclean<-finalgrowthlong
class(growclean) <- c(class(growclean),"fdpdata",type,census)
} else {
## extract data
dbhdata <- do.call(cbind,lapply(censusdata,function(X) X[,"dbh"]))
survivaldata <- do.call(cbind,lapply(censusdata,function(X) X[,"DFstatus"]))
codedata <- do.call(cbind,lapply(censusdata,function(X) X[,"codes"]))
datedata <- do.call(cbind,lapply(censusdata,function(X) X[,"date"]))
## rename columns
colnames(dbhdata) = paste0("dbh",1:dim(dbhdata)[2])
colnames(survivaldata) = paste0("survival",1:dim(survivaldata)[2])
colnames(codedata) = paste0("code",1:dim(codedata)[2])
colnames(datedata) = paste0("date",1:dim(datedata)[2])
## get meta data
metadata <- censusdata[[1]][,c("tag","sp")]
growclean <- cbind(metadata,dbhdata,survivaldata,datedata,codedata)
class(growclean) <- c(class(growclean),"fdpdata",type,census)
}
}
return(growclean)
}
# Compile function for speed
prepareFDPdata<-cmpfun(prepareFDPdata)
#' Extract Relavent Data
#'
#' \code{extractData} - extracts growth, survival or a combined dataset from
#' from objects previously prepared by \code{prepareFDPdata}
#'
#' @param FDPdata An object return by \code{prepareFDPdata}
#' @param type Which format is desired? Can be either 'growth', 'survival',
#' 'resprouter' or a paired format "paired" - in which each growth
#' and survival information is paired.
#' The paired format must be created with \code{prepareFDPdata} using the option
#' type='trajectory'.
#' @param datestan A date standardizer, when datestan = 21 the first cenus
#' dates are set to +- 0. This is only relavent for trajectory modelling.
#'
#' @author Marco D. Visser
#'
#' @export
extractData <- function(FDPdata=NULL,type='growth', datestan=21) {
if(is.element("tree",class(FDPdata))){
if(type=='growth'&is.element("paired",class(FDPdata))) {
##remove individuals that died in a second census
finalgrowthlong <- subset(FDPdata,DFstatus2=="alive")
finalgrowthlong$years <- (finalgrowthlong$date2-
finalgrowthlong$date1)/365.25
finalgrowthlong$grw <- ((finalgrowthlong$dbh2-finalgrowthlong$dbh1)
/finalgrowthlong$years)
## remove broken and dying individuals (resprouts)
growclean<-subset(finalgrowthlong, !is.na(dbh1)&!is.na(dbh2))
return(growclean)
}
if(type=='survival'&is.element("paired",class(FDPdata))) {
survivallong <- subset(FDPdata,DFstatus1=='alive')
survivallong$surv <- survivallong$DFstatus2=='alive'
survivallong$T <- (survivallong$date2-survivallong$date1)/356.25
survclean <- subset(survivallong,!is.na(dbh1))
return(survclean)
}
if(type=='resprouter'&is.element("paired",class(FDPdata))) {
## Get reprouter transition
##remove individuals that died in a second census
finallong <- subset(FDPdata,DFstatus2!="dead"&DFstatus2!='missing'&
codes1!='R')
finallong$years <- (finalgrowthlong$date2-
finalgrowthlong$date1)/365.25
## id resprouters
finallong$R <- ifelse(finallong$codes2=='R')
}
if(is.element("trajectory",class(FDPdata))) {
##How many census points?
Ncen <- length(grep("dbh",names(FDPdata)))
##Kick out all trees dead on the first census if any
deadonarrival <- FDPdata[,paste0("survival",1)]=="dead"
FDPdata <- FDPdata[!deadonarrival,]
}
if(type=='resprouter'&is.element("trajectory",class(FDPdata))) {
## 1) Resprouter cohorts
## find resprouters based on code (as this is most reliable
codesub<-FDPdata[,paste0("code",1:Ncen)]
## remove individuals that were already resprouting in the
## first census
cen1resprout <- codesub[,1]=='R'
cen1resprout[is.na(cen1resprout)] <- FALSE
resprouters <- FDPdata[!cen1resprout,]
## limit data to the rest of the resprouters
Rid <- apply(resprouters[,paste0("code",1:Ncen)],1
,function(x) is.element("R",x))
resprouters <- resprouters[Rid,]
## Id start and end of resprouter status
codesub<-resprouters[,paste0("code",1:Ncen)]
firstresprout <- suppressWarnings(sapply(1:dim(codesub)[1],function(X)
min(which(codesub[X,]=='R'))))
resprouters$fr <- firstresprout
## finalize
return(resprouters)
}
if(is.element(type,c('survival','growth','paired'))&
is.element("trajectory",class(FDPdata))) {
## find and drop all reprouters, these are treated differently
## once an individual respouts,it is always considered a reprouter
## and it is treated in a seperate analysis
resprouter <- FDPdata[,paste0("survival",1:Ncen)]=="lost_stem"
## when did it first respout?
firstresprout <- suppressWarnings(sapply(1:dim(resprouter)[1],function(X)
min(which(resprouter[X,],arr.ind=T))))
firstresprout[!is.finite(firstresprout)]<-NA
## Find missing individuals that are actually resprouters
missing <- FDPdata[,paste0("survival",1:Ncen)]=="missing"
## run through all respouters and fix
correction <-resprouter[!is.na(firstresprout),]
pb <- txtProgressBar(min = 0, max = dim(correction)[1], style = 3)
for(i in 1:dim(correction)[1]){
correction[i,][na.omit(firstresprout)[i]:Ncen]<-TRUE
setTxtProgressBar(pb, i)
}
close(pb)
## id missing
resprouter[!is.na(firstresprout),]<-correction
# remove respouter information from data
FDPdata[,paste0("dbh",1:Ncen)][resprouter] <- NA
## which data points have information in them?
infopoints <- !is.na(FDPdata[,paste0('dbh',1:Ncen)])
if(type=='growth'){
## dicard those with only one or less points
growdatpoints <- apply(infopoints,1,sum)
growclean <- subset(FDPdata,growdatpoints>1)
} else {
## dicard those no points of meassurement (eg. missing)
growdatpoints <- apply(infopoints,1,sum)
growclean <- subset(FDPdata,growdatpoints>=1)
}
## update infopoints
infopoints <- !is.na(growclean[,paste0('dbh',1:Ncen)])
##id the first and last census with information
growclean$start <- sapply(1:dim(infopoints)[1],function(X)
min(which(infopoints[X,],arr.ind=T)))
growclean$end <- sapply(1:dim(infopoints)[1],function(X)
max(which(infopoints[X,],arr.ind=T)))
## stardardize dates
dates <- growclean[,paste0('date',1:Ncen)]/365.25
growclean[,paste0('date',1:Ncen)]<-dates
growclean$startdate <- sapply(1:nrow(growclean),
function(X) dates[X,growclean$start[X]])
## remove those with missing dates, as dates cannot be sampled in the MCMC
## these are a handful of missing individuals
nainc=apply(is.na(growclean[,paste0('date',1:5)]),1,sum)>1
growclean <- growclean[!nainc,]
if(is.element(type,c('paired','survival'))){
## transform survival columns into binary data
survdata <- growclean[,paste0('survival',1:Ncen)]
survdata <- ifelse(survdata!='dead',1,0)
## shift survival to correspond to DBH t-1
survdata[,1:(Ncen-1)] <- survdata[,2:Ncen]
## no information on survival beyond the census
survdata[,Ncen] <- NA
## finalize
survdata -> growclean[,paste0('survival',1:Ncen)]
if(type=='survival'){
## Make time since previous census
survdates <- growclean[,paste0('date',1:Ncen)]
## Shift dates to correspond to t-1 and subtract from date t
survdates[,1:Ncen] <- (survdates[,c(2:Ncen,5)] - survdates[,1:Ncen])
## no information on survival beyond the census
survdates[,Ncen] <- 0
## repair names
colnames(survdates) <- paste0('date',1:Ncen)
## finalize
survdates -> growclean[,paste0('date',1:Ncen)]
return(growclean)}
if(type=='paired'){
## Make time since previous census
survdates <- growclean[,paste0('date',1:Ncen)]
## Shift dates to correspond to t-1 and subtract from date t
survdates[,1:Ncen] <- (survdates[,c(2:Ncen,5)] - survdates[,1:Ncen])
## no information on survival beyond the census
print(1)
survdates[,Ncen] <- 0
## rename to T
colnames(survdates) <- paste("T",1:Ncen,sep='')
print(1)
## finalize
pairdata <- cbind(growclean,survdates)
return(pairdata)}
} else {return(growclean)}
}
if(type=='paired'&!is.element("trajectory",class(FDPdata))) {
stop("paired option only valid on trajectory data")
}
}
else{
if(!is.element("seedling",class(FDPdata))){
stop('expected FDPdata of class seedling')}
finalseedlings <- extractDataSeedling(FDPdata=FDPdata,type=type)
return(finalseedlings)
}
}
#' Extract Relavent Data from seedling database
#'
#' \code{extractDataSeeding} - extracts growth, survival or a combined dataset
#' from seedling data previously prepared by \code{prepareFDPdata}
#'
#' @param FDPdata An object return by \code{prepareFDPdata}
#' @param type Which format is desired? Can be either 'growth', 'survival',
#' 'resprouter' or a paired format "paired" - in which each growth
#' and survival information is
#' The paired format must be created with \code{prepareFDPdata} using the option
#' type='trajectory'.
#' @param datestan A date standardizer, when datestan = 21 the first cenus
#' dates are set to +- 0. This is only relavent for trajectory modelling.
#'
#' @author Marco D. Visser
#'
#'@export
extractDataSeedling <- function(FDPdata=NULL,type='growth') {
if(is.element(type,c('growth','paired'))&
is.element("paired",class(FDPdata))) {
FDPdata <- subset(FDPdata,hght1>0&hght2>0)
FDPdata$int <- (FDPdata$date2-FDPdata$date1)/365.25
FDPdata$grw <- (FDPdata$hght2-FDPdata$hght1)/as.numeric(FDPdata$int)
return(FDPdata)
}
if(type=='survival'&is.element("paired",class(FDPdata))) {
survivallong <- subset(FDPdata,status1=='A')
survivallong$surv <- survivallong$status2=='A'
survivallong$T <- as.numeric((survivallong$date2-survivallong$date1))/356.25
survclean <- subset(survivallong,!is.na(hght1))
return(survclean)
}
if(is.element("trajectory",class(FDPdata))) {
Ncen <- length(grep("hght",names(FDPdata)))
## make all -2 and -9 into NA
heightdata <- FDPdata[,paste0('hght',1:Ncen)]
makeNApoints <- (is.na(heightdata)|heightdata<=0)
heightdata[makeNApoints] <- NA
## overwrite original
heightdata -> FDPdata[,paste0('hght',1:Ncen)]
## which data points have information in them?
heightdata <- FDPdata[,paste0('hght',1:Ncen)]
infopoints <- !(is.na(heightdata))
if(type=='growth'){
## dicard those with only one or less points
growdatpoints <- apply(infopoints,1,sum)
growclean <- subset(FDPdata,growdatpoints>1)
} else {
## dicard those no points of meassurement (eg. missing)
growdatpoints <- apply(infopoints,1,sum)
growclean <- subset(FDPdata,growdatpoints>=1)
}
## update infopoints
heightdata <- growclean[,paste0('hght',1:Ncen)]
infopoints <- !(is.na(heightdata))
##id the first and last census with information
growclean$start <- sapply(1:dim(infopoints)[1],function(X)
min(which(infopoints[X,],arr.ind=T)))
growclean$end <- sapply(1:dim(infopoints)[1],function(X)
max(which(infopoints[X,],arr.ind=T)))
## stardardize dates
dates <- growclean[,paste0('date',1:Ncen)]/365.25
## replace those with missing dates, as dates cannot be sampled in the MCMC
## replace with average censusdate
averagedates <- colMeans(dates,na.rm=T)
dates <- sapply(1:dim(dates)[2],function(X)
ifelse(!is.na(dates[,X]),dates[,X],averagedates[X]))
growclean[,paste0('date',1:Ncen)]<-dates
growclean$startdate <- sapply(1:nrow(growclean),
function(X) dates[X,growclean$start[X]])
if(is.element(type,c('paired','survival'))){
## transform survival columns into binary data
survdata <- growclean[,paste0('survival',1:Ncen)]
survdata <- ifelse(survdata=='A',1,0)
## shift survival to correspond to DBH t-1
survdata[,1:(Ncen-1)] <- survdata[,2:Ncen]
## no information on survival beyond the census
survdata[,Ncen] <- NA
## finalize
survdata -> growclean[,paste0('survival',1:Ncen)]
if(type=='survival'){
## Make time since previous census
survdates <- growclean[,paste0('date',1:Ncen)]
## Shift dates to correspond to t-1 and subtract from date t
survdates[,1:Ncen] <- (survdates[,c(2:Ncen,5)] - survdates[,1:Ncen])
## no information on survival beyond the census
survdates[,Ncen] <- 0
## repair names
colnames(survdates) <- paste0('date',1:Ncen)
## finalize
survdates -> growclean[,paste0('date',1:Ncen)]
return(growclean)}
if(type=='paired'){
## Make time since previous census
survdates <- growclean[,paste0('date',1:Ncen)]
## Shift dates to correspond to t-1 and subtract from date t
survdates[,1:Ncen] <- (survdates[,c(2:Ncen,5)] - survdates[,1:Ncen])
## no information on survival beyond the census
print(1)
survdates[,Ncen] <- 0
## rename to T
colnames(survdates) <- paste("T",1:Ncen,sep='')
print(1)
## finalize
pairdata <- cbind(growclean,survdates)
return(pairdata)}
} else {return(growclean)}
}
}
extractData<-cmpfun(extractData)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.