#' Estimating range shifts
#'
#' Estimation and helper functions for nls fit of migration model
#'
#' @details This algorithm minimizes the square of the distance of the locations
#' from a double-headed hockeystick curve, then estimates the times scale using the ARMA/AR models.
#' Confidence intervals are obtained by bootstrapping the data and reestimating.
#' See example and vignette for implementation.
#'
#' @param T time
#' @param n.clust the number of ranges to estimate. Two is relatively easy and robust,
#' and three works fairly will (with good initial guesses). More can be prohibitively slow.
#' @param p.m0 initial parameter guesses - a named vector with (e.g.) elements x1, x2, y1, y2, t1, dt.
#' It helps if this is close - the output of \code{\link{quickfit}} can be helpful, as can plotting
#' the curve and using \code{\link{locator}}. If left as NULL, the function will make some guesses
#' for you - starting with \code{quickfit}.
#' @param dt0 initial guess for duration of migration
#' @param method one of `ar` or `like` (case insenstive), whether or not to use the AR equivalence
#' method (faster, needs regular data - with some tolerance for gaps) or Likelihood method, which is
#' slower but robust for irregular data.
#' @param CI whether or not to estimate confidence intervals
#' @param nboot number of bootstraps
#' @param X x coordinate
#' @param Y y coordinate
#' @param area.direct passed as direct argument to getArea
#' @param model one of "MWN", "MOU" or "MOUF" (case insensitive). By default, the algorithm selects
#' the best one according to AIC using the \code{\link{selectModel}} function.
#'
#' @return a list with the following elements
#' \item{T,X,Y}{Longitude coordinate with NA at prediction times}
#' \item{p.hat}{Point estimates of parameters}
#' \item{p.CI}{Data frame of parameter estimates with (approximate) confidence intervals.}
#' \item{model}{One of "wn", "ou" or "ouf" - the selected model for the residuals.}
#' \item{hessian}{The hessian of the mean parameters.}
#'
#' @example ./demo/estimate_shift_example.r
#' @export
estimate_shift <- function(T, X, Y, n.clust = 2,
p.m0 = NULL, dt0=min(5, diff(range(T))/20),
method = c("ar","like")[1], CI=TRUE,
nboot = 100, model = NULL,
time.units = "day",
area.direct = NULL)
{
method <- tolower(method)
n <- length(T)
hessian <- NULL
use.quickfit <- FALSE
if("POSIXt" %in% is(T) | "Date" %in% is(T)){
T.POSIX <- T
T <- difftime(T, T[1], units = time.units) %>% as.numeric
is.POSIX <- TRUE
} else is.POSIX <- FALSE
if(is.null(p.m0)){
p.m0 <- try(quickfit(T,X,Y, dt=dt0, n.clust = n.clust, plotme=FALSE))
use.quickfit <- TRUE
}
if(inherits(p.m0, "try-error")){
warning("Having a hard time guessing initial centroids using k-means clustering. \n
I'm going to take a wild guess, but you should consider providing guesses.\n")
if(n.clust == 2)
p.m0 <- c(t1 = round((T[n]-T[1])*0.5+T[1]), dt = mean(diff(T))*2, x1 = X[1], y1 = Y[1], x2 = X[n], y2 = Y[n])
if(n.clust == 3)
p.m0 <- c(t1 = round((T[n]-T[1])*0.3+T[1]),
t2 = round((T[n]-T[1])*0.6+T[1]),
dt1 = mean(diff(T))*2,
dt2 = mean(diff(T))*2,
x1 = X[1], y1 = Y[1],
x2 = X[n], y2 = Y[n])
if(n.clust > 3) stop("Scratch that. You're going to have to provide some decent guesses\n
if you really want to fit more than 3 ranges at once.")
}
##################################################
# obtain nls fit and residuals
##################################################
# mean parameters
if(n.clust == 2) FUN <- getMu else if(n.clust > 2) FUN <- getMu_multi
p.mu.fit <- getP.mu(T, X, Y, p.m0, FUN = FUN, hessian = TRUE)
p.mu.hat <- p.mu.fit$par
dts <- grep("dt", names(p.mu.hat))
if(length(dts) < n.clust - 1){
alldts <- paste0("dt", 1:(n.clust - 1))
dt.hats <- p.mu.hat[grep("dt",names(p.mu.hat))]
dt.present <- names(dt.hats)
dt.absent <- alldts[!(alldts %in% dt.present)]
dt.hats <- c(dt.hats, rep(0, n.clust - 1 - length(dt.hats)))
names(dt.hats) <- c(dt.present, dt.absent)
dt.hats <- dt.hats[order(names(dt.hats))]
p.mu.hat <- c(p.mu.hat[!grepl("dt", names(p.mu.hat))], dt.hats)
}
XY.hat <- FUN(T, p.mu.hat)
Z.res <- (X + 1i*Y) - (XY.hat[,1] + 1i*XY.hat[,2])
rangemodel.given <- TRUE
if(is.null(model)){
rangemodel.given <- FALSE
model <- selectModel(Z.res, T, method = method)
}
model <- tolower(model)
if(grepl("m", model)) model <- substring(model, 2)
tau.fit <- try(getTau(Z.res, T=T, model=model, method = method, CI=CI))
if(inherits(tau.fit, "try-error") & model == "ouf"){
warning("Couldn't fit OUF to the residuals. Switching to OU model.\n")
model <- "ou"
tau.fit <- try(getTau(Z.res, T=T, model=model, method = method, CI=CI))
}
if(inherits(tau.fit, "try-error") & model == "ou"){
warning("Couldn't fit OU to the residuals. Switching to WN model.\n")
model <- "wn"
tau.fit <- try(getTau(Z.res, T=T, model=model, method = method, CI=CI))
}
if(method == "ar" & model == "ouf") model <- "ou"
tau.hat <- tau.fit$tau.hat
ll <- tau.fit$ll
aic <- computeAIC(ll, model, n.clust)
# compile results
p.hat <- c(tau.hat, p.mu.hat)
A <- getArea(p.hat,T,X,Y, model=model, direct = area.direct)
if(is.POSIX){
t.hat <- p.hat[paste0("t", 1:(n.clust-1))]
t.hat.POSIX <- min(T.POSIX) + lubridate::duration(t.hat, units = time.units)
names(t.hat.POSIX) <- names(t.hat)
t.start <- t.hat.POSIX
dts <- p.hat[grepl("dt", names(p.hat))]
t.end <- t.start + lubridate::duration(dts, units = time.units)
shifttimes.POSIX <- data.frame(start = lubridate::round_date(t.start, time.units),
end = lubridate::round_date(t.end, time.units),
duration = round(t.end - t.start))
} else shifttimes.POSIX = NULL
mrsa.fit <- list(p.hat = c(A = A$A.hat, p.hat),
shifttimes.POSIX = shifttimes.POSIX,
model = model,
ll=c(ll=ll), aic = aic['aic'], df = aic['df'],
method = method,
n.clust = n.clust,
rangemodel.given = rangemodel.given,
p.m0 = p.m0,
use.quickfit = use.quickfit,
X=X, Y=Y, T=T,
Z.res=Z.res,
is.POSIX = is.POSIX)
if(is.POSIX){
mrsa.fit$T.POSIX = T.POSIX
mrsa.fit$time.units = time.units
}
if(CI) {
# obtain mean parameters
hessian <- p.mu.fit$hessian
# hideous, hideous kludge to fix occasinal, inexplicable, enormous CI's for time related estimates
if(n.clust == 2)
p.mu.sd <- try(sqrt(1/diag(hessian))) else {
#try(sqrt(diag(solve(hessian)))) else {
which.xy <- c(grep("x", colnames(hessian)), grep("y", colnames(hessian)))
which.t <- grep("t", colnames(hessian))
p.mu.sd <- try(sqrt( c(diag(solve(hessian[which.xy, which.xy])), 1/diag(hessian[which.t,which.t]))))
}
if(inherits(p.mu.sd, "try-error")){
warning(cat("Can't obtain some confidence intervals (probably the migration time is too fast).\n"))
p.mu.sd <- p.mu.hat * NA
zeros <- which(diag(hessian) == 0)
p.mu.sd[-zeros] <- sqrt(diag(solve(hessian[-zeros,-zeros])))
}
if(any(is.na(p.mu.sd))){
warning(cat("Can't obtain some confidence intervals - most likely because the migration time is too fast.\n"))
p.mu.sd[is.na(p.mu.sd)] <- 0
}
# check to see if all dt's are available
p.mu.sd.df <- gtools::smartbind(data.frame(t(p.mu.hat)),
data.frame(t(p.mu.sd)))[2,]
p.mu.sd <- as.vector(t(p.mu.sd.df))
names(p.mu.sd) <- names(p.mu.sd.df)
# obtain variance parameters
tau.bs <- matrix(ncol = length(tau.hat), nrow = nboot)
if(model == "wn") tau.withCI <- NULL
if(model == "ou" | model == "ouf"){
if(method == "ar"){
count.errors <- 0
cat("Using bootstrap for confidence intervals around the variance parameters.\n")
for(i in 1:nboot){
sample <- sample(1:n, replace=TRUE) %>% sort %>% unique
tau.fit <- try(getTau(Z.res = Z.res[sample], T = T[sample],
tau0 = tau.hat, model = model, method="ar"), silent = TRUE)
if(inherits(tau.fit, "try-error")){
tau.bs[i,] <- rep(0, length(tau.hat))
count.errors <- count.errors + 1
}
else tau.bs[i,] <- tau.fit$tau.hat
}
tau.withCI <- apply(t(t(tau.bs)), 2, quantile, prob = c(0.5,0.025, 0.975), na.rm=TRUE) %>% data.frame
names(tau.withCI) <- names(tau.hat)
tau.withCI <- t(tau.withCI)
if(count.errors >= 10)
cat("Several bootstrapped samples failed to estimate time-scale parameters. Consider trying a simpler model.\n")
}
if(method == "like"){
tau.CI <- tau.fit$tau.CI
if(model == "ou") {tau.withCI <- data.frame(tau.z = c(tau.hat, tau.CI$tau.z)) %>% t} else
tau.withCI <- cbind(tau.hat, t(tau.CI))
}
}
p.mu.CI <- cbind(p.mu.hat, p.mu.hat - 1.96*p.mu.sd, p.mu.hat + 1.96*p.mu.sd)
p.CI <- rbind(A = c(A$A.hat, A$A.hat + c(-1.96,1.96) * A$A.se),
tau.withCI,
p.mu.CI) %>% data.frame
names(p.CI) <- c("p.hat", "CI.low", "CI.high")
# truncate negative dt's
dts <- grep("dt", row.names(p.CI))
p.CI[dts,]$CI.low <- pmax(0,p.CI[dts,]$CI.low)
mrsa.fit$p.CI <- p.CI
}
class(mrsa.fit) <- "shiftfit"
return(mrsa.fit)
}
#' @export
print.shiftfit <- function(x, ...){
# x is the fit
with(x, {
cat("Range shift estimation:\n\n")
n <- length(X)
cat(paste0(n, " observations over time range ", min(T), " to ", max(T),".\n"))
if(n.clust == 2)
cat(paste0(n.clust, " ranges (one shift) fitted using the ", toupper(method), " method.\n\n")) else
cat(paste0(n.clust, " ranges (", n.clust-1, " shifts) fitted using the ", toupper(method), " method.\n"))
if(use.quickfit){
cat("Initial shift parameters obtained from `quickfit`:\n")
print(signif(p.m0, 4), ...)}
if(!rangemodel.given)
cat(paste0("The AIC-selected ranging model is ", toupper(model), ".\n\n")) else
cat(paste0("The fitted ranging model is ", toupper(model), ".\n\n"))
cat("Parameter Estimates:\n")
if(!("p.CI" %in% names(x))) print(t(p.hat),...) else print(p.CI,...)
if(x$is.POSIX){
cat("\nEstimates of range-shift timing:\n")
print(x$shifttimes.POSIX)
}
cat(paste0("\nlog-likelihood: ", signif(ll,4), "; AIC: ", signif(aic,4), "; degrees of freedom: ", signif(df,4), "\n"))
})
invisible(x)
}
#' @export
summary.shiftfit <- function(object,...){print.shiftfit(object,...); invisible(object)}
#' @export
AIC.shiftfit <- function(object,...) object$aic
#' @export
logLik.shiftfit <- function(object,...) object$ll
#' @export
computeAIC <- function(ll, model, n.clust){
if(model %in% c("mouf","ouf")) k.s <- 3 else
if(model %in% c("mou","ou")) k.s <- 2 else
if(model %in% c("mwn", "wn")) k.s <- 1
k.m <- n.clust * 2 + (n.clust - 1) * 2
return(c(aic = -2*ll + 2*(k.s + k.m), df = k.s + k.m))
}
#' @export
getP.mu <- function(T,X,Y,p.m0, FUN = getMu, hessian = TRUE, bounds = FALSE){
getL.nls <- function(p.m, T, X, Y, FUN){
Mu <- FUN(T, p.m)
sum(c(X - Mu[,1], Y - Mu[,2])^2)
}
xs <- grep("x", names(p.m0))
ys <- grep("y", names(p.m0))
dts <- grep("dt", names(p.m0))
ts <- (1:length(p.m0))[-c(xs,ys,dts)]
fit <- try(optim(p.m0, getL.nls, T=T, X=X, Y=Y, hessian=hessian, FUN = FUN))
if(any(fit$par[dts] < 0)){
names(which(fit$par[dts] < 0))
p.m02 <- p.m0
p.m02 <- p.m0[c(xs, ys, ts, dts[fit$par[dts] > 0])]
fit <- try(optim(p.m02, getL.nls, T=T, X=X, Y=Y, hessian=hessian, FUN = FUN))
}
fit
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.