#wrap function for dshm_split_transects
dshm_split_transect <- function(transect.data, inter.dist, lwr, search.time, w, mute = TRUE, cap){
x <- floor(sp::SpatialLinesLengths(transect.data)/lwr) #maximum number of segements given the length of the given segment
#if the transect is less than 2x minimum segment length, it will not be splitted
if(x<=1){
#conversion from SpatialLinesDataFrame to SpatialLines
seg <- raster::spLines(sp::coordinates(transect.data)[[1]][[1]], crs = raster::crs(transect.data))
#re-conversion to SpatialLinesDataFrame adding original transect label and sample (or segment) label
seg <- sp::SpatialLinesDataFrame(seg, data.frame(Transect.Label = transect.data$ID, Sample.Label = 1))
if(cap){
seg.buf <- rgeos::gBuffer(seg, width = w, capStyle = "ROUND", byid = TRUE) #adding a buffer
} else {
seg.buf <- rgeos::gBuffer(seg, width = w, capStyle = "FLAT", byid = TRUE)
}
seg.buf$length <- sp::SpatialLinesLengths(seg)/10^3 #adding length information in km
seg.buf$area <- raster::area(seg.buf)/10^6 #adding area information in km^2
} else { #if the transect is more than 2x minimum segment length, it will be splitted
repeat{
t1 <- proc.time() #starting recording time
x <- x-1 #number of cutting points (this is always one order smaller than the number of segments)
repeat{ #starting routine that splits the transect
#sampling cutting points on the transect in a 'stratified random approach' (note that sometimes it might happen than one point lays out of the line, this will get an error)
pts <- sp::spsample(transect.data, x, type = "stratified")
shape_conv <- sf::st_as_sfc(transect.data) #conversion from SpatialLinesDataFrame to 'sfc' object
pts_conv <- sf::st_as_sfc(pts) #conversion from SpatialPoints to 'sfc' object
pts_buff <- sf::st_buffer(pts_conv, dist = inter.dist) #adding a buffer to the points
diff <- sf::st_difference(shape_conv, sf::st_union(sf::st_combine(pts_buff))) #difference between lines and points gives segments
diff <- sf::st_cast(diff, "LINESTRING") #conversion from 'MULTILINESTRING' to 'LINESTRING'
coords <- list() #empty vector to store coordinates
sls <- list() #empty vector to store segments
sls.buf <- list() #empty vector to store buffered segments
d <- c() #empty vector to store segment distances
for(i in 1:length(diff)){ #for each segment in 'diff' object
xy <- sf::st_coordinates(diff[i])[,1:2] #get coordinates
coords[[i]] <- xy #store coordinates
#conversion to SpatialLinesDataFrame
sls[[i]] <- sp::SpatialLinesDataFrame(raster::spLines(xy, crs = raster::crs(transect.data)),
data = data.frame(Transect.Label = transect.data$ID, Sample.Label = i))
d[i] <- sp::SpatialLinesLengths(sls[[i]]) #calculate and store distance
sls.buf[[i]] <- rgeos::gBuffer(sls[[i]], width = w, capStyle = "FLAT", byid = TRUE) #add buffer with 'flat' end
sls.buf[[i]]$length <- sp::SpatialLinesLengths(sls[[i]])/10^3 #add info on length in m^2
sls.buf[[i]]$area <- raster::area(sls.buf[[i]])/10^6 #add info on area in km^2
}
if (!mute) { #if true=FALSE, then it prints segment distances at every iteration
print(d)
}
t2<-(proc.time() - t1) #time after each iteration
if(sum(d>lwr) == length(d)||t2 > search.time){ #if the length of all segments meets the input limit OR if the searching time is greater than the input limit, the routine exits and goest to x-1
break
}
}
if(x==1){ #if the number of cutting points reaches 1, next iteration starts for the maximum again
x <- floor(sp::SpatialLinesLengths(transect.data)/lwr)
}
if(sum(d > lwr)==length(d)){ #if the length of all segments meets the input limit, the routine stops
break
}
}
seg.buf <- do.call(raster::bind, sls.buf) #all segments is list 'sls.buf' are bound in one single object
if(cap){
whole <- rgeos::gBuffer(transect.data, width = w - 1, capStyle = "ROUND", byid = TRUE) #buffered transect. Note the -2 due to imperfection in difference calculation (below)
whole_seg <- rgeos::gDifference(whole, seg.buf, byid = TRUE, id = as.character(seg.buf$Transect.Label)) #Difference between whole transect and the segments. This is used to extract the capped ends!
if(length(seg.buf)==2){ #if the segments are only 2, capped ends are used and bound together to get the final segments
whole_seg <- sp::disaggregate(whole_seg) #separating each component of the difference between transect and segments
whole_seg$area <- raster::area(whole_seg) #calculating the area
caps <- whole_seg[whole_seg$area > lwr*w*2,] #selecting those that have an area > lwr*w*2
} else if(length(seg.buf)==3){ #if the segments are 3, capped ends are used and bound with the middle segment
caps <- sp::disaggregate(whole_seg[2,]) #capped ends are equivalent of taking out the middle segment
} else { #if the segments are > 3
upr.capped <- sp::disaggregate(whole_seg[2,]) #one capped end
lwr.capped <- sp::disaggregate(whole_seg[length(whole_seg)-1,]) #opposite capped end
upr.capped$area <- raster::area(upr.capped) #calculating the area
lwr.capped$area <- raster::area(lwr.capped)
upr.capped <- upr.capped[upr.capped$area==min(upr.capped$area),] #extracting the segment with the smaller area
lwr.capped <- lwr.capped[lwr.capped$area==min(lwr.capped$area),]
caps <- raster::bind(upr.capped, lwr.capped) #binding capped segments in one object
}
caps$Transect.Label <- rep(transect.data$ID,2) #adding transect label information to caps
caps$Sample.Label <- c(1,length(seg.buf)) #adding segment label (i.e. 1 or max length of segment, order does not matter)
caps$length <- seg.buf[c(1,length(seg.buf)),]$length #adding length information
caps$area <- raster::area(caps)/10^6 #recalculating area in km^2
if(length(seg.buf)==2){ #if the segments are 2, caps are bound together
seg.buf <- caps
} else { #if segments are > 2 caps are bound with middle segment/s
seg.buf <- raster::bind(seg.buf[-c(1,length(seg.buf)),],caps)
}
}
}
return(seg.buf) #splitted transect with capped ends is returned
}
#function that prepares data for dshm_fit
#' Prepares data for Hurdle model fitting
#'
#' @param det.fn Detection function fitted by ds.
#' @param obsdata Dataframe object containing 4 columns: (1) 'Sample.Label' (i.e. label for segments), (2) 'size' for cluster size, (3) 'distance' (in km) for perpendicular distance of sighting from the transect line, and (4) 'Effort' for segment length (in km).
#' @param segdata Dataframe object with at least 3 columns: (1) 'Transect.Label' (i.e. label for transects), (2) 'Sample.Label' (i.e. label for segments), and (3) 'Effort' for segment length (in km). It may also contain additional columns with relevant habitat covariates specific to each segment that will be fed into the spatial model.
#' @param group If TRUE group abundance is estimated (i.e. 'size' = 1).
#' @param strip.width Transect strip width to calculate segment area if area is not provided.
#' @return Input dataframe 'segdata' with an additional column for animal abundance ('abund') corrected by the detection probability estimated by ds. If they are not included, a column for presence-absence ('pa') is added together with one for segment area (Effort*w, i.e. the strip width estimated by ds).
#' @author Filippo Franchini \email{filippo.franchini@@outlook.com}
dshm_prep <- function(det.fn, obsdata, segdata, group=FALSE, strip.width=NULL) {
if(is.null(det.fn)){
obsdata$p<-1
w<-strip.width
} else {
obsdata$p <- stats::predict(det.fn$ddf, newdata = obsdata)$fitted #predicting p
w <- summary(det.fn)$ddf$meta.data$width #extracting strip width from ddf object
}
if (group) {
obsdata$size<-1
}
#obsdata$size.c <- obsdata$size/obsdata$p #correcting size with p
obsdata <- dplyr::group_by(obsdata, Sample.Label) #grouping by segment
obsdata <- dplyr::summarize(obsdata, size.c = sum(size, na.rm = TRUE),phat=mean(p,na.rm=TRUE)) #summarizing calculating the sum of individuals for each segment
obsdata <- as.data.frame(obsdata) #conversion to dataframe
#obsdata$size.c <- round(obsdata$size.c)
segdata$abund <- rep(0) #adding empty column to segdata for abundance
segdata$phat <- rep(1)
for (i in 1:length(obsdata[, 1])) {
# the loop takes the segment label in obsdata and if it corresponds to any of the segemnt labels in segement data it pastes
# the corresponding corrected abundance value
for (j in 1:length(segdata[, 1])) {
if (obsdata$Sample.Label[i] == segdata$Sample.Label[j]) {
segdata$abund[j] <- obsdata$size.c[i]
segdata$phat[j] <- obsdata$phat[i]
}
}
}
if (sum(colnames(segdata) == "area") == 0) {
# if there is no area column in the dataset, calculating the area taking the effort and multiplying it by two times the strip
# width
possibleError <- tryCatch({segdata$area <- (segdata$Effort * w * 2)}, error = function(e) e)
if (inherits(possibleError, "error")) {
stop("Either strip.width not specified (for strip transects) or column for Effort missing (or differently named) in segment.data")
}
}
if (sum(colnames(segdata) == "pa") == 0) {
# if there is no presence-absence column (pa), calculating it
segdata$pa <- ifelse(segdata$abund > 0, 1, 0)
}
return(segdata) #the function returns the segdata dataset formatted for modelling
}
#wrap function for bootstrap
dshm_fit_4boot <- function(det.fn.par, effects.pa,effects.ab, method, lim, distdata, obsdata, segdata, grid, w.pa, w.ab, ID.pa, ID.ab, knots.pa, knots.ab, group, indices,stratification) {
# for each simulation, random sampling of the rows of original dataset according to the speciefied number
if(stratification=="none"){
id<-sample(rownames(distdata), replace = TRUE)
} else {
id<-list()
if(stratification=="stratum"){
for (j in 1:length(levels(distdata$Region.Label))){
id[[j]]<-sample(rownames(subset(distdata,distdata$Region.Label==levels(distdata$Region.Label)[j])),replace=TRUE)
}
} else if (stratification == "transect") {
for (j in 1:length(levels(distdata$Transect.Label))){
id[[j]]<-sample(rownames(subset(distdata,distdata$Transect.Label==levels(distdata$Transect.Label)[j])),replace=TRUE)
}
}
id<-do.call(what = c,id)
}
distdata <- distdata[id, ] #randomly resampling rows in distance dataset with replcement
distdata$object <- rownames(distdata) #changing object name according to resampling
if (group) {
distdata$size<-1
}
det.fn <- Distance::ds(data = distdata, transect = det.fn.par$transect, key = det.fn.par$key, adjustment = det.fn.par$adjustment, truncation = det.fn.par$truncation,
formula = det.fn.par$formula, quiet = TRUE) #fitting detection function
data <- dshm_prep(det.fn, obsdata, segdata, group) #calculating detection probabilities, correcting abundance and prepare segment dataset for analysis
data <- data[indices, ]
pa <- data$pa #presence-absence for Cc
data.ab <- subset(data, pa > 0) #presence-only abundance dataset for Cc
ab <- data.ab$abund #presence-only abundance data for Cc
ab.full <- data$abund #full abundance data for Cc
# HURDLE MODEL - presence absence----
rowsL <- length(effects.pa)
mod.sel.pa <- data.frame(mod = rep(0, rowsL), expl_Dev = rep(0, rowsL), Chisq = rep(0, rowsL), logLik = rep(0, rowsL), AICc = rep(0,
rowsL)) #empty dataframe (model selection table for pa data) to fill with model type, % explained deviance, chi-squared test, log-likelihood, and AICc for different covariate combinations
mod.sel.pa$mod <- effects.pa #specifying the single main effects as well as their combinations
mod.list.pa <- list() #empty list for fitted model in the loop below
for (i in 1:rowsL) {
# this loop fits binomial gam for each of the specifyied combinations allowing for different k
eq <- paste("pa~", effects.pa[i],"+offset(log(area))") #specifying the equations for the single main effects, the switch function allows for changing the covariates within the loop framework
gam.pa <- mgcv::gam(stats::as.formula(eq), family = stats::binomial(link = "logit"), data = data, method = method,knots = knots.pa[[i]]) #fitting the gam according to the specified equations
mod.list.pa[[i]] <- gam.pa #storing each of the fitted model in the empty list specified before
mod.sel.pa[i, 2] <- round(((gam.pa$null.deviance - gam.pa$deviance)/gam.pa$null.deviance) * 100, 2) #calculating the % of deviance explained by each model
mod.sel.pa[i, 3] <- round((1 - stats::pchisq(gam.pa$deviance, gam.pa$df.residual)), 2) #Chi-squared statistics (GOF)
mod.sel.pa[i, 4] <- round(stats::logLik(gam.pa)[1], 3) #log-likelihood
mod.sel.pa[i, 5] <- MuMIn::AICc(gam.pa) #AICc
}
mod.sel.pa <- data.frame(mod.sel.pa, deltaAICc = mod.sel.pa$AICc - min(mod.sel.pa$AICc), w = round(MuMIn::Weights(mod.sel.pa$AICc),
3)) #adding delta AICc and AIC weights to the model selection table
mod.sel.pa <- mod.sel.pa[order(mod.sel.pa$deltaAICc), ] #order the model selection table according to increasing delta AICc
best.pa <- list() #empty list for the best models in the model selection table
sub.pa <- subset(mod.sel.pa, mod.sel.pa$w >= lim) #taking the models which have a delta AIC less or equal to 4
if (length(ID.pa) > 1) {
# conditional part that decides to average models if there are more than one model in the list with the best models
eval.pa <- array(0, dim = c(length(data[, 1]), 1, length(ID.pa))) #specifying the array with number of rows equal to the length of the full dataset, 1 column, and dimensions equal to the number of models in the list with the best models
grid.pa <- array(0, dim = c(length(grid[, 1]), 1, length(ID.pa)))
for (i in 1:length(ID.pa)) {
# loop going through all models in the list, extracting the fitted values, multiplying them by each model weight, and storing
# the resulting vectors into the array
best.pa[i] <- mod.list.pa[ID.pa[i]]
eval.pa[, , i] <- stats::predict(mod.list.pa[[ID.pa[i]]], newdata = data, type = "response") * (w.pa[ID.pa[i]]/sum(w.pa[ID.pa])) #multiplying fitted values by weights
grid.pa[, , i] <- stats::predict(mod.list.pa[[ID.pa[i]]], newdata = grid, type = "response") * (w.pa[ID.pa[i]]/sum(w.pa[ID.pa]))
}
fit.w.pa <- apply(eval.pa, 1, sum) #sum across the array dimension to get final averaged fitted values
grid.w.pa <- apply(grid.pa, 1, sum)
} else {
# conditional part if there is only one model in the list, no averaging
best.pa <- mod.list.pa[ID.pa[1]]
fit.w.pa <- stats::predict(mod.list.pa[[ID.pa[1]]], newdata = data, type = "response") #taking fitted values of the model
grid.w.pa <- stats::predict(mod.list.pa[[ID.pa[1]]], newdata = grid, type = "response")
} #end of conditional part
# HURDLE MODEL - abundance----
mod.sel.ab <- data.frame(mod = rep(0, rowsL), expl_Dev = rep(0, rowsL), Chisq = rep(0, rowsL), logLik = rep(0, rowsL), AICc = rep(0,
rowsL)) #empty dataframe (model selection table for ab data) to fill with model type, % explained deviance, chi-squared test, log-likelihood, and AICc for different covariate combinations
mod.sel.ab$mod <- effects.ab #specifying the single main effects as well as their combinations
mod.list.ab <- list() #empty list for fitted model in the loop below
for (i in 1:rowsL) {
# this loop fits gam for each of the specifyied combinations allowing for different distributions and k
eq <- paste("ab~", effects.ab[i],"+offset(log(phat*area))") #specifying the equations for the single main effects, the switch function allows for changing the covariates within the loop framework
gam.ab <- mgcv::gam(stats::as.formula(eq), family = countreg::ztpoisson(), data = data.ab, method = method,knots = knots.ab[[i]]) #fitting the gam according to the specified equations
mod.list.ab[[i]] <- gam.ab #storing each of the fitted model in the empty list specified before
mod.sel.ab[i, 2] <- round(((gam.ab$null.deviance - gam.ab$deviance)/gam.ab$null.deviance) * 100, 2) #calculating the % of deviance explained by each model
mod.sel.ab[i, 3] <- round((1 - stats::pchisq(gam.ab$deviance, gam.ab$df.residual)), 2) #Chi-squared statistics (GOF)
mod.sel.ab[i, 4] <- round(stats::logLik(gam.ab)[1], 2) #log-likelihood
mod.sel.ab[i, 5] <- MuMIn::AICc(gam.ab) #AICc
}
mod.sel.ab <- data.frame(mod.sel.ab, deltaAICc = mod.sel.ab$AICc - min(mod.sel.ab$AICc), w = round(MuMIn::Weights(mod.sel.ab$AICc),
3)) #adding delta AICc and AIC weights to the model selection table
mod.sel.ab <- mod.sel.ab[order(mod.sel.ab$deltaAICc), ] #order the model selection table according to increasing delta AICc
best.ab <- list() #empty list for the best models in the model selection table
sub.ab <- subset(mod.sel.ab, mod.sel.ab$w >= lim) #taking the models which have a delta AIC less or equal to 4
if (length(ID.ab) > 1) {
# conditional part that decides to average models if there are more than one model in the list with the best models
eval.ab <- array(0, dim = c(length(data.ab[, 1]), 1, length(ID.ab))) #specifying the array with number of rows equal to the length of the presence-only abundance dataset, 1 column, and dimensions equal to the number of models in the list with the best models
eval.ab.full <- array(0, dim = c(length(data[, 1]), 1, length(ID.ab))) #specifying the array with number of rows equal to the length of the full dataset, 1 column, and dimensions equal to the number of models in the list with the best models
grid.ab <- array(0, dim = c(length(grid[, 1]), 1, length(ID.ab)))
for (i in 1:length(ID.ab)) {
# loop going through all models in the list, extracting the fitted values, multiplying them by each model weight, and storing
# the resulting vectors into the array
best.ab[[i]] <- mod.list.ab[[ID.ab[i]]] #storing each best model into a new list
eval.ab[, , i] <- stats::predict(mod.list.ab[[ID.ab[i]]], newdata = data.ab, type = "response") * (w.ab[ID.ab[i]]/sum(w.ab[ID.ab])) #multiplying fitted values by weights
eval.ab.full[, , i] <- stats::predict(mod.list.ab[[ID.ab[i]]], newdata = data, type = "response") * (w.ab[ID.ab[i]]/sum(w.ab[ID.ab])) #multiplying predicted values on the full dataset by weights
grid.ab[, , i] <- stats::predict(mod.list.ab[[ID.ab[i]]], newdata = data.frame(grid,phat=1), type = "response") * (w.ab[ID.ab[i]]/sum(w.ab[ID.ab]))
}
fit.w.ab <- apply(eval.ab, 1, sum) #sum across the array dimension to get final averaged fitted values (presence-only abundance)
fit.w.ab.full <- apply(eval.ab.full, 1, sum) #sum across the array dimension to get final averaged fitted values (full dataset, Hurdle model evaluation)
grid.w.ab <- apply(grid.ab, 1, sum)
} else {
# conditional part if there is only one model in the list, no averaging
best.ab <- mod.list.ab[ID.ab[1]]
fit.w.ab <- stats::predict(mod.list.ab[[ID.ab[1]]], newdata = data.ab, type = "response") #taking fitted values of the model
fit.w.ab.full <- stats::predict(mod.list.ab[[ID.ab[1]]], newdata = data, type = "response") #taking predicted values (full dataset) of the model
grid.w.ab <- stats::predict(mod.list.ab[[ID.ab[1]]], newdata = data.frame(grid,phat=1), type = "response")
} #end of conditional part
eval.H <- ifelse(ab.full==0,fit.w.pa,fit.w.ab.full * fit.w.pa) #multiplying pa with ab (full dataset) predictions (HURDLE)
grid.H <- grid.w.ab * grid.w.pa
return(c(grid.H, eval.H, ab.full))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.