R/predict.R

Defines functions raster_from_predict newdata_from_raster drop_final_year clean_up_covs check_type make_mod_matrix

# General predict method-------------------------------------------------------

# Common predict function for all fit types
# with exception of occuMulti and occuMS (at the end of this file)
setMethod("predict", "unmarkedFit",
  function(object, type, newdata, backTransform = TRUE, na.rm = TRUE,
           appendData = FALSE, level=0.95, re.form=NULL, ...){

  # If no newdata, get actual data
  if(missing(newdata) || is.null(newdata)) newdata <- object@data

  # Check inputs
  check_predict_arguments(object, type, newdata)

  # Get model matrix (X) and offset
  # If newdata is an unmarkedFrame, use getDesign via predict_inputs_from_umf()
  is_raster <- FALSE
  if(inherits(newdata, "unmarkedFrame")){
    # Generate model matrix and offsets
    pred_inps <- predict_inputs_from_umf(object, type, newdata, na.rm, re.form)
  } else {
    # If newdata is provided
    # 1. Get original data and appropriate formula for type
    orig_data <- get_orig_data(object, type)
    orig_formula <- get_formula(object, type)

    # 2. If newdata is raster, get newdata from raster as data.frame
    if(inherits(newdata, c("RasterLayer","RasterStack","SpatRaster"))){
      is_raster <- TRUE
      orig_raster <- newdata
      newdata <- newdata_from_raster(newdata, all.vars(orig_formula))
    }

    # 3. Make model matrix and offset with newdata, informed by original data
    pred_inps <- make_mod_matrix(orig_formula, orig_data, newdata, re.form)
  }

  # Calculate predicted values in chunks (for speed) based on X and offset
  out <- predict_by_chunk(object, type, level, pred_inps$X, pred_inps$offset,
                          chunk_size = 70, backTransform, re.form)

  # Convert output to raster if newdata was raster
  if(is_raster){
    out <- raster_from_predict(out, orig_raster, appendData)
  } else if(appendData){
    # Append data if needed
    out <- data.frame(out, as(newdata, "data.frame"))
  }

  out

})

# Function to make model matrix and offset from formula, newdata and original data
# This function makes sure factor levels in newdata match, and that
# any functions in the formula are handled properly (e.g. scale)
make_mod_matrix <- function(formula, data, newdata, re.form=NULL){
  form_nobars <- lme4::nobars(formula)
  mf <- model.frame(form_nobars, data, na.action=stats::na.pass)
  X.terms <- stats::terms(mf)
  fac_cols <- data[, sapply(data, is.factor), drop=FALSE]
  xlevs <- lapply(fac_cols, levels)
  xlevs <- xlevs[names(xlevs) %in% names(mf)]
  nmf <- model.frame(X.terms, newdata, na.action=stats::na.pass, xlev=xlevs)
  #X <- model.matrix(X.terms, newdata, xlev=xlevs)
  X <- model.matrix(form_nobars, nmf)
  offset <- model.offset(nmf)
  if(is.null(re.form) & !is.null(lme4::findbars(formula))){
    Z <- get_Z(formula, data, newdata)
    X <- cbind(X, Z)
  }
  list(X=X, offset=offset)
}


# Fit-type specific methods----------------------------------------------------

# Fit type-specific methods to generate different components of prediction
# 1. check_predict_arguments(): Check arguments
# 2. predict_inputs_from_umf(): Generating inputs from an unmarked
#    frame (e.g. when no newdata) using getDesign
# 3. get_formula: Get formula for submodel type
# 4. get_orig_data(): Get original dataset for use in building model frame
# 5. predict_by_chunk(): Take inputs and generate predictions
# Basic methods are shown below; fit type-specific methods in their own sections

setGeneric("check_predict_arguments", function(object, ...){
  standardGeneric("check_predict_arguments")
})

setMethod("check_predict_arguments", "unmarkedFit",
  function(object, type, newdata, ...){
  # Check if type is supported (i.e., is it in names(object)?)
  check_type(object, type)

  # Check newdata class
  if(!inherits(newdata, c("unmarkedFrame", "data.frame", "RasterLayer", "RasterStack", "SpatRaster"))){
    stop("newdata must be unmarkedFrame, data.frame, RasterLayer, RasterStack, or SpatRaster", call.=FALSE)
  }
  invisible(TRUE)
})

# Check if predict type is valid
check_type <- function(mod, type){
  opts <- names(mod)
  if(type %in% opts) return(invisible(TRUE))
  stop("Valid types are ", paste(opts, collapse=", "), call.=FALSE)
}

# Get X and offset when newdata is umf
setGeneric("predict_inputs_from_umf", function(object, ...){
  standardGeneric("predict_inputs_from_umf")
})

setMethod("predict_inputs_from_umf", "unmarkedFit",
  function(object, type, newdata, na.rm, re.form){
  designMats <- getDesign(newdata, object@formula, na.rm = na.rm)
  if(type == "state") list_els <- c("X","Z_state","X.offset")
  if(type == "det") list_els <- c("V","Z_det","V.offset")

  X <- designMats[[list_els[1]]]
  if(is.null(re.form)) X <- cbind(X, designMats[[list_els[2]]])
  offset <- designMats[[list_els[3]]]

  list(X=X, offset=offset)
})

# Get correct individual formula based on type
setGeneric("get_formula", function(object, type, ...){
  standardGeneric("get_formula")
})

setMethod("get_formula", "unmarkedFit", function(object, type, ...){
  if(type == "state"){
    return(as.formula(paste("~", object@formula[3], sep="")))
  } else if(type == "det"){
    return(as.formula(object@formula[[2]]))
  }
  NULL
})

# When newdata is data.frame/raster, get original dataset
# For use in building correct model frame
setGeneric("get_orig_data", function(object, type, ...){
  standardGeneric("get_orig_data")
})

# Note that by default, final year of yearlySiteCov data at each site is dropped
# Because transition probabilities are not estimated for final year
# this is appropriate for dynamic models but not temporary emigration models
# for which the drop_final should be FALSE
setMethod("get_orig_data", "unmarkedFit", function(object, type, ...){
  clean_covs <- clean_up_covs(object@data, drop_final=TRUE)
  datatype <- switch(type, state='site_covs', det='obs_covs')
  clean_covs[[datatype]]
})

# Convert NULL data frames to dummy data frames of proper dimension
# Add site covs to yearlysitecovs, ysc to obs covs, etc.
# Drop final year of ysc if necessary
clean_up_covs <- function(object, drop_final=FALSE){
  M <- numSites(object)
  R <- obsNum(object)
  T <- 1
  J <- R
  is_mult <- methods::.hasSlot(object, "numPrimary")
  if(is_mult){
    T <- object@numPrimary
    J <- R/T
  }

  sc <- siteCovs(object)
  if(is.null(sc)) sc <- data.frame(.dummy=rep(1,M))
  out <- list(site_covs=sc)

  if(is_mult){
    ysc <- yearlySiteCovs(object)
    if(is.null(ysc)) ysc <- data.frame(.dummy2=rep(1,M*T))
    ysc <- cbind(ysc, sc[rep(1:M, each=T),,drop=FALSE])
  }

  if(methods::.hasSlot(object, "obsCovs")){
    oc <- obsCovs(object)
    if(is.null(oc)) oc <- data.frame(.dummy3=rep(1,M*T*J))
    if(is_mult){
      oc <- cbind(oc, ysc[rep(1:(M*T), each=J),,drop=FALSE])
    } else {
      oc <- cbind(oc, sc[rep(1:M, each=J),,drop=FALSE])
    }
    out$obs_covs=oc
  }

  if(is_mult){
    if(drop_final & (T > 1)){
      # Drop final year of data at each site
      # Also drop factor levels only found in last year of data
      ysc <- drop_final_year(ysc, M, T)
    }
    out$yearly_site_covs <- ysc
  }

  out
}

#Remove data in final year of yearlySiteCovs (replacing with NAs)
#then drop factor levels found only in that year
drop_final_year <- function(dat, nsites, nprimary){
  dat[seq(nprimary, nsites*nprimary, by=nprimary), ] <- NA
  dat <- lapply(dat, function(x) x[,drop = TRUE])
  as.data.frame(dat)
}


# Take inputs (most importantly model matrix and offsets) and generate prediction
# done in chunks for speed, 70 was optimal after tests
setGeneric("predict_by_chunk", function(object, ...){
  standardGeneric("predict_by_chunk")
})

setMethod("predict_by_chunk", "unmarkedFit",
  function(object, type, level, xmat, offsets, chunk_size, backTransform=TRUE,
           re.form=NULL, ...){

  if(is.vector(xmat)) xmat <- matrix(xmat, nrow=1)
  nr <- nrow(xmat)
  ind <- rep(1:ceiling(nr/chunk_size), each=chunk_size, length.out=nr)

  # should find a way to keep xmat sparse, but it doesn't
  # work with linearComb
  #x_chunk <- lapply(split(as.data.frame(xmat), ind), as.matrix)
  x_chunk <- lapply(unique(ind),
                    function(i) as.matrix(xmat[ind==i,,drop=FALSE]))

  if(is.null(offsets)) offsets <- rep(0, nr)
  off_chunk <- split(offsets, ind)
  out <- mapply(function(x_i, off_i){
    has_na <- apply(x_i, 1, function(x_i) any(is.na(x_i)))
    # Work around linearComb bug where there can't be NAs in inputs
    x_i[has_na,] <- 0
    off_i[has_na] <- 0
    lc <- linearComb(object, x_i, type, offset=off_i, re.form=re.form)
    if(backTransform) lc <- backTransform(lc)
    out <- data.frame(Predicted=coef(lc))
    if(!is.null(level)){
      se <- SE(lc)
      ci <- confint(lc, level=level)
      out$SE <- se
      out$lower <- ci[,1]
      out$upper <- ci[,2]
    }
    out[has_na,] <- NA
    out
    }, x_chunk, off_chunk, SIMPLIFY=FALSE)
  out <- do.call(rbind, out)
  rownames(out) <- NULL
  out
})


# Raster handling functions----------------------------------------------------

# Convert a raster into a data frame to use as newdata
newdata_from_raster <- function(object, vars){
  if(inherits(object, "Raster")){
    if(!requireNamespace("raster", quietly=TRUE)) stop("raster package required", call.=FALSE)
    nd <- raster::as.data.frame(object)
    # Handle factor rasters
    is_fac <- raster::is.factor(object)
    rem_string <- paste(paste0("^",names(object),"_"), collapse="|")
    names(nd)[is_fac] <- gsub(rem_string, "", names(nd)[is_fac])
  } else if(inherits(object, "SpatRaster")){
    if(!requireNamespace("terra", quietly=TRUE)) stop("terra package required", call.=FALSE)
    nd <- terra::as.data.frame(object)
  }
  # Check if variables are missing
  no_match <- vars[! vars %in% names(nd)]
  if(length(no_match) > 0){
    stop(paste0("Variable(s) ",paste(no_match, collapse=", "), " not found in raster(s)"),
         call.=FALSE)
  }
  return(nd)
}

raster_from_predict <- function(pr, object, appendData){
  if(inherits(object, "Raster")){
    new_rast <- data.frame(raster::coordinates(object), pr)
    new_rast <- raster::stack(raster::rasterFromXYZ(new_rast))
    raster::crs(new_rast) <- raster::crs(object)
    if(appendData) new_rast <- raster::stack(new_rast, object)
  } else if(inherits(object, "SpatRaster")){
    new_rast <- data.frame(terra::crds(object), pr)
    new_rast <- terra::rast(new_rast, type="xyz")
    terra::crs(new_rast) <- terra::crs(object)
    if(appendData) new_rast <- c(new_rast, object)
  }
  new_rast
}


# pcount methods---------------------------------------------------------------

setMethod("check_predict_arguments", "unmarkedFitPCount",
  function(object, type, newdata, ...){
  if(type %in% c("psi", "alpha")){
    stop(paste0(type, " is scalar. Use backTransform instead."), call.=FALSE)
  }
  methods::callNextMethod(object, type, newdata)
})

# Special predict approach for ZIP distribution in pcount
# All other distributions use default method
setMethod("predict_by_chunk", "unmarkedFitPCount",
  function(object, type, level, xmat, offsets, chunk_size, backTransform=TRUE,
           re.form=NULL, ...){
  if(type == "state" & object@mixture == "ZIP"){

    out <- data.frame(matrix(NA, nrow(xmat), 4))
    names(out) <- c("Predicted", "SE", "lower", "upper")

    psi.hat <- plogis(coef(object, type="psi"))
    lamEst <- object["state"]
    psiEst <- object["psi"]
    fixedOnly <- !is.null(re.form)
    lam.mle <- coef(lamEst, fixedOnly=fixedOnly)
    lam_vcov <- vcov(lamEst, fixedOnly=fixedOnly)
    if(is.null(offsets)) offsets <- rep(0, nrow(xmat))

    for(i in 1:nrow(xmat)) {
      if(nrow(xmat) > 5000) {
          if(i %% 1000 == 0)
              cat("  doing row", i, "of", nrow(xmat), "\n")
      }
      if(any(is.na(xmat[i,]))) next
      ## for the ZIP model the predicted values on the log scale have us
      ## add log(1-psi.hat) to the normal linear prediction
      out$Predicted[i] <-   xmat[i,] %*% lam.mle + offsets[i] + log(1 - psi.hat)
      ## to compute the approximate SE, I compute the variance of the usual
      ## linear part -- that is easy, and to that I add the variance of
      ## log(1-psi.hat) obtained by the delta approximation
      logit.psi<-coef(object,type="psi")
      #  To do that I took derivative of log(1-psi.hat) using application
      #  of chain rule.... hopefully correctly.
      delta.approx.2ndpart<-   ( ((1/(1-psi.hat))*(exp(logit.psi)/((1+exp(logit.psi))^2)))^2 ) * (SE(psiEst)^2)
      ## now the SE is the sqrt of the whole thing
      out$SE[i]<- sqrt( t(xmat[i,])%*% lam_vcov %*%xmat[i,] + delta.approx.2ndpart   )

      #From Mike Meredith
      alf <- (1 - level) / 2
      crit<-qnorm(c(alf, 1 - alf))
      ci <- out$Predicted[i] + crit * out$SE[i]
      out$lower[i]<- ci[1]
      out$upper[i]<- ci[2]
      if(backTransform){
        out$Predicted[i] <- exp(out$Predicted[i])
        ### If back-transform, delta approx says var = (exp(linear.predictor)^2)*Var(linear.predictor)
        ### also I exponentiate the confidence interval.....
        out$SE[i]<- out$Predicted[i]*out$SE[i]
        ci<-exp(ci)
      }
      out$lower[i] <- ci[1]
      out$upper[i] <- ci[2]
    }
    return(out)
  }
  methods::callNextMethod(object, type, level, xmat, offsets, chunk_size,
                          backTransform, re.form, ...)
})


# colext methods---------------------------------------------------------------

setMethod("predict_inputs_from_umf", "unmarkedFitColExt",
  function(object, type, newdata, na.rm, re.form=NA){
  designMats <- getDesign(newdata, object@formula, na.rm = na.rm)
  list_el <- switch(type, psi="W", col="X.gam", ext="X.eps", det="V")
  # colext doesn't support offsets
  list(X=designMats[[list_el]], offset=NULL)
})

setMethod("get_formula", "unmarkedFitColExt", function(object, type, ...){
  switch(type, psi=object@psiformula, col=object@gamformula,
                     ext=object@epsformula, det=object@detformula)
})

setMethod("get_orig_data", "unmarkedFitColExt", function(object, type, ...){
  clean_covs <- clean_up_covs(object@data, drop_final=TRUE)
  datatype <- switch(type, psi='site_covs', col='yearly_site_covs',
                     ext='yearly_site_covs', det='obs_covs')
  clean_covs[[datatype]]
})


# occuFP methods---------------------------------------------------------------

setMethod("predict_inputs_from_umf", "unmarkedFitOccuFP",
  function(object, type, newdata, na.rm, re.form=NA){
  designMats <- getDesign(newdata, object@detformula, object@FPformula,
                          object@Bformula, object@stateformula, na.rm=na.rm)
  X_idx <- switch(type, state="X", det="V", fp="U", b="W")
  off_idx <- paste0(X_idx, ".offset")
  list(X=designMats[[X_idx]], offset=designMats[[off_idx]])
})

setMethod("get_formula", "unmarkedFitOccuFP", function(object, type, ...){
  switch(type, state=object@stateformula, det=object@detformula,
         b=object@Bformula, fp=object@FPformula)
})

setMethod("get_orig_data", "unmarkedFitOccuFP", function(object, type, ...){
  # Get obs data if fp, b, or det
  new_type <- ifelse(type %in% c("fp", "b"), "det", type)
  methods::callNextMethod(object, new_type, ...)
})


# Dail-Madsen model methods----------------------------------------------------

# Includes unmarkedFitPCO, unmarkedFitMMO, unmarkedFitDSO

setMethod("check_predict_arguments", "unmarkedFitDailMadsen",
  function(object, type, newdata, ...){
  if(type %in% c("psi", "alpha", "scale")){
    stop(paste0(type, " is scalar. Use backTransform instead."), call.=FALSE)
  }
  dynamics <- object@dynamics
  immigration <- tryCatch(object@immigration, error=function(e) FALSE)
  if(identical(dynamics, "notrend") & identical(type, "gamma"))
    stop("gamma is a derived parameter for this model: (1-omega)*lambda")
  if(identical(dynamics, "trend") && identical(type, "omega"))
    stop("omega is not a parameter in the dynamics='trend' model")
  if(!immigration && identical(type, "iota"))
    stop("iota is not a parameter in the immigration=FALSE model")
  methods::callNextMethod(object, type, newdata)
})

setMethod("predict_inputs_from_umf", "unmarkedFitDailMadsen",
  function(object, type, newdata, na.rm, re.form=NA){
  designMats <- getDesign(newdata, object@formula, na.rm=na.rm)
  X_idx <- switch(type, lambda="Xlam", gamma="Xgam", omega="Xom",
                  iota="Xiota", det="Xp")
  off_idx <- paste0(X_idx, ".offset")
  list(X=designMats[[X_idx]], offset=designMats[[off_idx]])
})

setMethod("get_formula", "unmarkedFitDailMadsen", function(object, type, ...){
  fl <- object@formlist
  switch(type, lambda=fl$lambdaformula, gamma=fl$gammaformula,
         omega=fl$omegaformula, iota=fl$iotaformula, det=fl$pformula)
})

setMethod("get_orig_data", "unmarkedFitDailMadsen", function(object, type, ...){
  clean_covs <- clean_up_covs(object@data, drop_final=TRUE)
  datatype <- switch(type, lambda='site_covs', gamma='yearly_site_covs',
                     omega='yearly_site_covs', iota='yearly_site_covs',
                     det='obs_covs')
  clean_covs[[datatype]]
})

# This method differs for DSO
setMethod("get_orig_data", "unmarkedFitDSO", function(object, type, ...){
  clean_covs <- clean_up_covs(object@data, drop_final=TRUE)
  datatype <- switch(type, lambda='site_covs', gamma='yearly_site_covs',
                     omega='yearly_site_covs', iota='yearly_site_covs',
                     det='yearly_site_covs')
  clean_covs[[datatype]]
})

# Special handling for ZIP distribution
setMethod("predict_by_chunk", "unmarkedFitDailMadsen",
  function(object, type, level, xmat, offsets, chunk_size, backTransform=TRUE,
           re.form=NULL, ...){
  if(type == "lambda" & object@mixture == "ZIP"){
    warning("Method to compute SE for ZIP model has not been written", call.=FALSE)
    out <- data.frame(matrix(NA, nrow(xmat), 4))
    names(out) <- c("Predicted", "SE", "lower", "upper")
    lam.mle <- coef(object, type="lambda")
    psi.hat <- plogis(coef(object, type="psi"))
    if(is.null(offsets)) offsets <- rep(0, nrow(xmat))
    out$Predicted <- as.numeric(xmat %*% lam.mle + offsets + log(1 - psi.hat))
    if(backTransform) out$Predicted <- exp(out$Predicted)
    return(out)
  }
  methods::callNextMethod(object, type, level, xmat, offsets, chunk_size,
                          backTransform, re.form, ...)
})


# Temporary emigration models--------------------------------------------------

# All inherit from GMM so only one set of methods is required
# (except GDR which has its own predict method right now)

setMethod("predict_inputs_from_umf", "unmarkedFitGMM",
  function(object, type, newdata, na.rm, re.form=NA){
  designMats <- getDesign(newdata, object@formula, na.rm=na.rm)
  X_idx <- switch(type, lambda="Xlam", phi="Xphi", det="Xdet")
  off_idx <- paste0(X_idx, ".offset")
  list(X=designMats[[X_idx]], offset=designMats[[off_idx]])
})

setMethod("get_formula", "unmarkedFitGMM", function(object, type, ...){
  fl <- object@formlist
  switch(type, lambda=fl$lambdaformula, phi=fl$phiformula, det=fl$pformula)
})

setMethod("get_orig_data", "unmarkedFitGMM", function(object, type, ...){
  clean_covs <- clean_up_covs(object@data, drop_final=FALSE)
  datatype <- switch(type, lambda='site_covs', phi='yearly_site_covs',
                     det='obs_covs')
  clean_covs[[datatype]]
})


# occuTTD----------------------------------------------------------------------

# Identical to colext

setMethod("predict_inputs_from_umf", "unmarkedFitOccuTTD",
  function(object, type, newdata, na.rm, re.form=NA){
  designMats <- getDesign(newdata, object@formula, na.rm = na.rm)
  list_el <- switch(type, psi="W", col="X.gam", ext="X.eps", det="V")
  list(X=designMats[[list_el]], offset=NULL)
})

setMethod("get_formula", "unmarkedFitOccuTTD", function(object, type, ...){
  switch(type, psi=object@psiformula, col=object@gamformula,
                     ext=object@epsformula, det=object@detformula)
})

setMethod("get_orig_data", "unmarkedFitOccuTTD", function(object, type, ...){
  clean_covs <- clean_up_covs(object@data, drop_final=TRUE)
  datatype <- switch(type, psi='site_covs', col='yearly_site_covs',
                     ext='yearly_site_covs', det='obs_covs')
  clean_covs[[datatype]]
})


# nmixTTD----------------------------------------------------------------------

setMethod("predict_inputs_from_umf", "unmarkedFitNmixTTD",
  function(object, type, newdata, na.rm, re.form=NA){
  designMats <- getDesign(newdata, object@formula, na.rm = na.rm)
  list_el <- switch(type, state="W", det="V")
  list(X=designMats[[list_el]], offset=NULL)
})

setMethod("get_formula", "unmarkedFitNmixTTD", function(object, type, ...){
  switch(type, state=object@stateformula, det=object@detformula)
})

setMethod("get_orig_data", "unmarkedFitNmixTTD", function(object, type, ...){
  clean_covs <- clean_up_covs(object@data, drop_final=FALSE)
  datatype <- switch(type, state='site_covs', det='obs_covs')
  clean_covs[[datatype]]
})


# gdistremoval-----------------------------------------------------------------

setMethod("predict_inputs_from_umf", "unmarkedFitGDR",
  function(object, type, newdata, na.rm, re.form=NA){
  designMats <- getDesign(newdata, object@formlist)
  if(type == "lambda") list_els <- c("Xlam","Zlam")
  if(type == "phi") list_els <- c("Xphi","Zphi")
  if(type == "dist") list_els <- c("Xdist","Zdist")
  if(type == "rem") list_els <- c("Xrem", "Zrem")
  X <- designMats[[list_els[1]]]
  if(is.null(re.form)) X <- cbind(X, designMats[[list_els[2]]])
  list(X=X, offset=NULL)
})

setMethod("get_formula", "unmarkedFitGDR", function(object, type, ...){
  fl <- object@formlist
  switch(type, lambda=fl$lambdaformula, phi=fl$phiformula,
         dist=fl$distanceformula, rem=fl$removalformula)
})

setMethod("get_orig_data", "unmarkedFitGDR", function(object, type, ...){
  clean_covs <- clean_up_covs(object@data, drop_final=FALSE)
  datatype <- switch(type, lambda='site_covs', phi='yearly_site_covs',
                     dist='yearly_site_covs', rem='obs_covs')
  clean_covs[[datatype]]
})


# occuMulti--------------------------------------------------------------------

# bespoke predict method since it has numerious unusual options
# and requires bootstrapping

# This method is used by simulate but not by predict
setMethod("get_formula", "unmarkedFitOccuMulti", function(object, type, ...){
  switch(type, state=object@stateformulas,
               det=object@detformulas)
})

setMethod("predict", "unmarkedFitOccuMulti",
     function(object, type, newdata,
              #backTransform = TRUE, na.rm = TRUE,
              #appendData = FALSE,
              se.fit=TRUE, level=0.95, species=NULL, cond=NULL, nsims=100,
              ...)
  {

  type <- match.arg(type, c("state", "det"))

  if(is.null(hessian(object))){
    se.fit = FALSE
  }

  species <- name_to_ind(species, names(object@data@ylist))
  cond <- name_to_ind(cond, names(object@data@ylist))

  if(missing(newdata)){
    newdata <- NULL
  } else {
    if(! class(newdata) %in% c('data.frame')){
      stop("newdata must be a data frame")
    }
  }

  maxOrder <- object@call$maxOrder
  if(is.null(maxOrder)) maxOrder <- length(object@data@ylist)
  dm <- getDesign(object@data,object@detformulas,object@stateformulas,
                  maxOrder, na.rm=F, newdata=newdata, type=type)

  params <- coef(object)
  low_bound <- (1-level)/2
  up_bound <- level + (1-level)/2


  if(type=="state"){
    N <- nrow(dm$dmOcc[[1]]); nF <- dm$nF; dmOcc <- dm$dmOcc;
    fStart <- dm$fStart; fStop <- dm$fStop; fixed0 <- dm$fixed0
    t_dmF <- t(dm$dmF)

    calc_psi <- function(params){

      f <- matrix(NA,nrow=N,ncol=nF)
      index <- 1
      for (i in 1:nF){
        if(fixed0[i]){
          f[,i] <- 0
        } else {
          f[,i] <- dmOcc[[index]] %*% params[fStart[index]:fStop[index]]
          index <- index + 1
        }
      }
      psi <- exp(f %*% t_dmF)
      as.matrix(psi/rowSums(psi))
    }

    psi_est <- calc_psi(params)

    if(se.fit){
      message('Bootstrapping confidence intervals with ',nsims,' samples')
      Sigma <- vcov(object)
      samp <- array(NA,c(dim(psi_est),nsims))
      for (i in 1:nsims){
        samp[,,i] <- calc_psi(mvrnorm(1, coef(object), Sigma))
      }
    }

    if(!is.null(species)){

      sel_col <- species

      if(!is.null(cond)){
        if(any(sel_col %in% abs(cond))){
          stop("Species can't be conditional on itself")
        }
        ftemp <- object@data@fDesign
        swap <- -1*cond[which(cond<0)]
        ftemp[,swap] <- 1 - ftemp[,swap]
        num_inds <- apply(ftemp[,c(sel_col,abs(cond))] == 1,1,all)
        denom_inds <- apply(ftemp[,abs(cond),drop=F] == 1,1,all)
        est <- rowSums(psi_est[,num_inds,drop=F]) /
          rowSums(psi_est[,denom_inds, drop=F])
        if(se.fit){
          samp_num <- apply(samp[,num_inds,,drop=F],3,rowSums)
          samp_denom <- apply(samp[,denom_inds,,drop=F],3,rowSums)
          samp <- samp_num / samp_denom
        }

      } else {
        num_inds <- apply(object@data@fDesign[,sel_col,drop=FALSE] == 1,1,all)
        est <- rowSums(psi_est[,num_inds,drop=F])
        if(se.fit){
          samp <- samp[,num_inds,,drop=F]
          samp <- apply(samp, 3, rowSums)
        }
      }

      if(se.fit){
        if(!is.matrix(samp)) samp <- matrix(samp, nrow=1)
        boot_se <- apply(samp,1,sd, na.rm=T)
        boot_low <- apply(samp,1,quantile,low_bound, na.rm=T)
        boot_up <- apply(samp,1,quantile,up_bound, na.rm=T)
      } else{
        boot_se <- boot_low <- boot_up <- NA
      }
      return(data.frame(Predicted=est,
                        SE=boot_se,
                        lower=boot_low,
                        upper=boot_up))

    } else {
      codes <- apply(dm$z,1,function(x) paste(x,collapse=""))
      colnames(psi_est)  <- paste('psi[',codes,']',sep='')
      if(se.fit){
        boot_se <- apply(samp,c(1,2),sd, na.rm=T)
        boot_low <- apply(samp,c(1,2),quantile,low_bound, na.rm=T)
        boot_up <- apply(samp,c(1,2),quantile,up_bound, na.rm=T)
        colnames(boot_se) <- colnames(boot_low) <- colnames(boot_up) <-
          colnames(psi_est)
      } else {
        boot_se <- boot_low <- boot_up <- NA
      }
      return(list(Predicted=psi_est,
                  SE=boot_se,
                  lower=boot_low,
                  upper=boot_up))
    }
  }

  if(type=="det"){
    S <- dm$S; dmDet <- dm$dmDet
    dStart <- dm$dStart; dStop <- dm$dStop

    out <- list()
    for (i in 1:S){
      #Subset estimate to species i
      inds <- dStart[i]:dStop[i]
      new_est <- object@estimates@estimates$det
      new_est@estimates <- coef(object)[inds]
      new_est@fixed <- 1:length(inds)
      if(se.fit){
        new_est@covMat <- vcov(object)[inds,inds,drop=FALSE]
        new_est@covMatBS <- object@covMatBS[inds,inds,drop=FALSE]
      } else{
        new_est@covMat <- matrix(NA, nrow=length(inds), ncol=length(inds))
        new_est@covMatBS <- matrix(NA, nrow=length(inds), ncol=length(inds))
      }

      prmat <- t(apply(dmDet[[i]], 1, function(x){
                    bt <- backTransform(linearComb(new_est, x))
                    if(!se.fit){
                      return(c(Predicted=bt@estimate, SE=NA, lower=NA, upper=NA))
                    }
                    ci <- confint(bt, level=level)
                    names(ci) <- c("lower", "upper")
                    c(Predicted=bt@estimate, SE=SE(bt), ci)
                  }))
      rownames(prmat) <- NULL
      out[[i]] <- as.data.frame(prmat)
    }
    names(out) <- names(object@data@ylist)
    if(!is.null(species)){
      return(out[[species]])
    }
    return(out)
  }
  stop("type must be 'det' or 'state'")
})


# occuMS-----------------------------------------------------------------------

# bespoke predict method since it requires bootstrapping

# This method is used by simulate by not by predict
setMethod("get_formula", "unmarkedFitOccuMS", function(object, type, ...){
  switch(type, psi=object@psiformulas, phi=object@phiformulas,
               det=object@detformulas)
})

setMethod("predict", "unmarkedFitOccuMS",
     function(object, type, newdata,
              #backTransform = TRUE, na.rm = TRUE,
              #appendData = FALSE,
              se.fit=TRUE, level=0.95, nsims=100, ...)
{

  #Process input---------------------------------------------------------------
  if(! type %in% c("psi","phi", "det")){
    stop("type must be 'psi', 'phi', or 'det'")
  }

  if(is.null(hessian(object))){
    se.fit = FALSE
  }

  if(missing(newdata)){
    newdata <- NULL
  } else {
    if(! class(newdata) %in% c('data.frame')){
      stop("newdata must be a data frame")
    }
  }

  S <- object@data@numStates
  gd <- getDesign(object@data,object@psiformulas,object@phiformulas,
                  object@detformulas, object@parameterization, na.rm=F,
                  newdata=newdata, type=type)

  #Index guide used to organize p values
  guide <- matrix(NA,nrow=S,ncol=S)
  guide <- lower.tri(guide,diag=TRUE)
  guide[,1] <- FALSE
  guide <- which(guide,arr.ind=TRUE)
  #----------------------------------------------------------------------------

  #Utility functions-----------------------------------------------------------
  #Get matrix of linear predictor values
  get_lp <- function(params, dm_list, ind){
    L <- length(dm_list)
    out <- matrix(NA,nrow(dm_list[[1]]),L)
    for (i in 1:L){
      out[,i] <- dm_list[[i]] %*% params[ind[i,1]:ind[i,2]]
    }
    out
  }

  #Get SE/CIs for conditional binomial using delta method
  split_estimate <- function(object, estimate, inds, se.fit){
    out <- estimate
    out@estimates <- coef(object)[inds]
    if(se.fit){
      out@covMat <- vcov(object)[inds,inds,drop=FALSE]
    } else{
      out@covMat <- matrix(NA, nrow=length(inds), ncol=length(inds))
    }
    out
  }

  lc_to_predict <- function(object, estimate, inds, dm, level, se.fit){

    new_est <- split_estimate(object, estimate, inds[1]:inds[2], se.fit)

    out <- t(apply(dm, 1, function(x){
      bt <- backTransform(linearComb(new_est, x))
      if(!se.fit) return(c(Predicted=bt@estimate, SE=NA, lower=NA, upper=NA))
      ci <- confint(bt, level=level)
      names(ci) <- c("lower", "upper")
      c(Predicted=bt@estimate, SE=SE(bt), ci)
    }))
    rownames(out) <- NULL
    as.data.frame(out)
  }


  #Calculate row-wise multinomial logit prob
  #implemented in C++ below as it is quite slow
  get_mlogit_R <- function(lp_mat){
    if(type == 'psi'){
      out <- cbind(1,exp(lp_mat))
      out <- out/rowSums(out)
      out <- out[,-1]
    } else if(type == 'phi'){ #doesn't work
      np <- nrow(lp_mat)
      out <- matrix(NA,np,ncol(lp_mat))
      ins <- outer(1:S, 1:S, function(i,j) i!=j)
      for (i in 1:np){
        phimat <- diag(S)
        phimat[ins] <- exp(lp_mat[i,])
        phimat <- t(phimat)
        phimat <- phimat/rowSums(phimat)
        out[i,] <- phimat[ins]
      }
    } else {
      R <- nrow(lp_mat)
      out <- matrix(NA,R,ncol(lp_mat))
      for (i in 1:R){
        sdp <- matrix(0,nrow=S,ncol=S)
        sdp[guide] <- exp(lp_mat[i,])
        sdp[,1] <- 1
        sdp <- sdp/rowSums(sdp)
        out[i,] <- sdp[guide]
      }
    }
    out
  }

  get_mlogit_C <- function(lp_mat){
    get_mlogit(lp_mat, type, S, guide-1) # via Rcpp
  }

  #----------------------------------------------------------------------------

  if(type=="psi"){
    dm_list <- gd$dm_state
    ind <- gd$state_ind
    est <- object@estimates@estimates$state
  } else if(type=="phi"){
    dm_list <- gd$dm_phi
    ind <- gd$phi_ind
    est <- object@estimates@estimates$transition
  } else {
    dm_list <- gd$dm_det
    ind <- gd$det_ind
    est <- object@estimates@estimates$det
  }

  P <- length(dm_list)

  low_bound <- (1-level)/2
  z <- qnorm(low_bound,lower.tail=F)

  out <- vector("list", P)
  names(out) <- names(dm_list)

  if(object@parameterization == 'condbinom'){
    out <- lapply(1:length(dm_list), function(i){
      lc_to_predict(object, est, ind[i,], dm_list[[i]], level, se.fit)
    })
    names(out) <- names(dm_list)
    return(out)

  } else if (object@parameterization == "multinomial"){
    lp <- get_lp(coef(object), dm_list, ind)
    pred <- get_mlogit_C(lp)

    M <- nrow(pred)
    upr <- lwr <- se <- matrix(NA,M,P)

    if(se.fit){
      message('Bootstrapping confidence intervals with',nsims,'samples')

      sig <- vcov(object)
      param_mean <- coef(object)
      rparam <- mvrnorm(nsims, param_mean, sig)

      get_pr <- function(i){
        lp <- get_lp(rparam[i,], dm_list, ind)
        get_mlogit_C(lp)
      }
      samp <- sapply(1:nsims, get_pr, simplify='array')

      for (i in 1:M){
        for (j in 1:P){
          dat <- samp[i,j,]
          se[i,j] <- sd(dat, na.rm=TRUE)
          quants <- quantile(dat, c(low_bound, (1-low_bound)),na.rm=TRUE)
          lwr[i,j] <- quants[1]
          upr[i,j] <- quants[2]
        }
      }

    }
  }

  for (i in 1:P){
    out[[i]] <- data.frame(Predicted=pred[,i], SE=se[,i],
                           lower=lwr[,i], upper=upr[,i])
  }

  out
})
rbchan/unmarked documentation built on April 3, 2024, 10:11 p.m.