dsm: Density Surface Modelling of Distance Sampling Data

Documented in dsm_varprop

#' Variance propagation for density surface models
#'
#' Calculate the uncertainty in predictions from a fitted DSM, including
#' uncertainty from the detection function.
#'
#' When we make predictions from a spatial model, we also want to know the
#' uncertainty about that abundance estimate. Since density surface models are
#' 2 (or more) stage models, we need to incorporate the uncertainty from the
#' earlier stages (i.e. the detection function) into our "final" uncertainty
#' estimate.
#'
#' This function will refit the spatial model but include the Hessian of the
#' offset as an extra term. Variance estimates using this new model can then be
#' used to calculate the variance of predicted abundance estimates which
#' incorporate detection function uncertainty. Importantly this requires that
#' if the detection function has covariates, then these do not vary within a
#' segment (so, for example covariates like sex cannot be used).
#'
#' For more information on how to construct the prediction grid `data.frame`,
#' `newdata`, see [`predict.dsm`][predict.dsm].
#'
#' This routine is only useful if a detection function with covariates has been
#' used in the DSM.
#'
#' Note that we can use `var.type="Vc"` here (see `gamObject`), which is the
#' variance-covariance matrix for the spatial model, corrected for smoothing
#' parameter uncertainty. See Wood, Pya & S{\"a}fken (2016) for more
#' information.
#'
#' Models with fixed scale parameters (e.g., negative binomial) do not require
#' an extra round of optimisation.
#'
#' @section Diagnostics:
#' The summary output from the function includes a simply diagnostic that shows
#' the average probability of detection from the "original" fitted model (the
#' model supplied to this function; column `Fitted.model`) and the probability
#' of detection from the refitted model (used for variance propagation; column
#' `Refitted.model`) along with the standard error of the probability of
#' detection from the fitted model (`Fitted.model.se`), at the unique values of
#' any factor covariates used in the detection function (for continuous
#' covariates the 5%, 50% and 95% quantiles are shown). If there are large
#' differences between the probabilities of detection then there are
#' potentially problems with the fitted model, the variance propagation or
#' both. This can be because the fitted model does not account for enough of
#' the variability in the data and in refitting the variance model accounts for
#' this in the random effect.
#'
#'
#' @return a `list` with elements:
#'  * `old_model` fitted model supplied to the function as `model`
#'  * `refit` refitted model object, with extra term
#'  * `pred` point estimates of predictions at `newdata`
#'  * `var` total variance calculated over all of `newdata`
#'  * `ses` standard error for each prediction cell in `newdata`
#' if `newdata=NULL` then the last three entries are `NA`.
#' @author David L. Miller, based on code from Mark V. Bravington and Sharon L.
#' Hedley.
#' @references
#' Bravington, M. V., Miller, D. L., & Hedley, S. L. (2021). Variance
#' Propagation for Density Surface Models. Journal of Agricultural, Biological
#' and Environmental Statistics. https://doi.org/10.1007/s13253-021-00438-2
#'
#' Williams, R., Hedley, S.L., Branch, T.A., Bravington, M.V., Zerbini, A.N.
#' and Findlay, K.P. (2011). Chilean Blue Whales as a Case Study to Illustrate
#' Methods to Estimate Abundance and Evaluate Conservation Status of Rare
#' Species. Conservation Biology 25(3), 526-535.
#'
#' Wood, S.N., Pya, N. and S{\"a}fken, B. (2016) Smoothing parameter and model
#' selection for general smooth models. Journal of the American Statistical
#' Association, 1-45.
#'
#' @param model a fitted \code{\link{dsm}}.
#' @param newdata the prediction grid. Set to `NULL` to avoid making
#' predictions and just return model objects.
#' @param trace for debugging, see how the scale parameter estimation is going.
#' @param var.type which variance-covariance matrix should be used (`"Vp"` for
#' variance-covariance conditional on smoothing parameter(s), `"Vc"` for
#' unconditional). See [`gamObject`][gamObject] for an details/explanation. If
#' in doubt, stick with the default, `"Vp"`.
#' @param var_type deprecated, use `var.type` instead.
#' @export
#' @importFrom utils relist
#'
# @examples
# \dontrun{
# library(Distance)
# library(dsm)
#
# # load the Gulf of Mexico dolphin data (see ?mexdolphins)
# data(mexdolphins)
#
# # fit a detection function
# df <- ds(distdata, truncation=6000,
#          key = "hn", adjustment = NULL)
#
# # fit a simple smooth of x and y
# mod1 <- dsm(count~s(x, y), df, segdata, obsdata, family=tw())
#
# # Calculate the variance
# preddata$off.set <- preddata$area
# mod1.varp <- dsm_varprop(mod1, preddata)
# summary(mod1.varp)
# # this will give a summary over the whole area in mexdolphins$preddata
# }
dsm_varprop <- function(model, newdata=NULL, trace=FALSE, var.type="Vp", var_type=NULL){

  # warn on using deprecated args
  this_call <- match.call(expand.dots = FALSE)
  if("var_type" %in% names(this_call)){
    stop("Argument: var_type is deprecated, check documentation.")
  }

  # die if the link isn't log
  if(model$family$link != "log"){
    stop("log link must be used!")
  }

  # check for valid var.type
  if(!(var.type %in% c("Vp","Vc"))){
    stop("var.type must be \"Vp\" or \"Vc\"")
  }

  # extract the link & invlink
  linkfn <- model$family$linkfun
  linkinvfn <- model$family$linkinv

  # die if we're not using REML
  if(model$method != "REML"){
    stop("REML must be used for smoothing parameter selection")
  }

  # extract the call
  this_call <- as.list(model$call)
  # remove the function
  this_call[1] <- NULL

  # extract the detection function(s)
  ddf <- model$ddf
  if(all(class(ddf)!="list")){
    ddf <- list(ddf)
    # work around for dsms fitted in previous versions
    if(is.null(model$data$ddfobj)){
      model$data$ddfobj <- 1
    }
  }

  # get detection function info
  parskel <- list()
  for(i in seq_along(ddf)){
    parskel[[i]] <- ddf[[i]]$par
  }
  npars <- lapply(parskel, length)

  # new function to differentiate
  mu_fn <- function(par, linkfn, ddf, data, ds_newdata, skel){

    # put the parameter flesh back on the list bones?
    fleshy <- relist(par, skel)

    ret <- rep(0, nrow(data))

    # loop over the detection functions we have
    for(i in seq_along(ddf)){
      this_ddf <- ddf[[i]]


      # if we don't have a real detection function
      if("fake_ddf" %in% class(this_ddf)){
        next
      }
      # set the detection function parameters to be par
      this_ddf <- set_ddf_par(fleshy[[i]], this_ddf)

      # for io we need new data from both observers
      if(this_ddf$method == "io"){
        ds_newdata[[i]] <- rbind(ds_newdata[[i]], ds_newdata[[i]])
        ds_newdata[[i]]$observer <- c(rep(1, nrow(ds_newdata[[i]])/2),
                                      rep(2, nrow(ds_newdata[[i]])/2))
        # note that predict.io will return a vector of the correct
        # length
      }
      # calculate probability of detection
      this_p <- predict(this_ddf, newdata=ds_newdata[[i]],
                        compute=TRUE)$fitted

      # repopulate with the duplicates back in
      this_p <- this_p[attr(ds_newdata[[i]], "index"), drop=FALSE]

      # what is the width?
      if(this_ddf$method == "io"){
        this_width <- this_ddf$ds$meta.data$width
      }else{
        this_width <- this_ddf$meta.data$width
      }

      # calculate offset
      if(this_ddf$meta.data$point){
        # calculate log effective circle area
        # nb. predict() returns effective area of detection for points
        ret[data$ddfobj==i] <- linkfn(this_p * pi * this_width^2 *
                                      data$Effort[data$ddfobj==i])
      }else{
        # calculate log effective strip width
        ret[data$ddfobj==i] <- linkfn(2 * this_width * this_p *
                                      data$Effort[data$ddfobj==i])
      }
    }
    return(ret)
  }

  # now form the data structures we need to calculate the derivatives
  # using the function above...
  ds_newdata <- list()
  u_ds_newdata <- list()
  for(i in seq_along(ddf)){

    # get this detection function
    this_ddf <- ddf[[i]]
    # get all the covariates in this model
    df_vars <- all_df_vars(this_ddf)

    # if we don't have a real detection function
    if("fake_ddf" %in% class(this_ddf)){
      ds_newdata[[i]] <- NA
      u_ds_newdata[[i]] <- NA
      next
    }

    # if we don't have covariates
    # then just setup the data for predict.ds to be a distance of 0,
    # which will be ignored anyway
    if(length(df_vars)==0){
      ds_newdata[[i]] <- data.frame(distance=rep(0, sum(model$data$ddfobj==i)))
    }else{
      # otherwise need the covars that are in the data (that we saved
      # with keepData=TRUE :))
      ds_newdata[[i]] <- model$data[model$data$ddfobj==i, ]
      ds_newdata[[i]] <- ds_newdata[[i]][ , df_vars, drop=FALSE]
      ds_newdata[[i]]$distance <- 0
    }

    if(this_ddf$method == "io"){
      ds_newdata[[i]]$observer <- 1
    }


    # probably a lot of duplicated stuff in the above, so let's just
    # pass the unique combinations
    # inside mu_fn will return the right length
    u_ds_newdata[[i]] <- mgcv::uniquecombs(ds_newdata[[i]])

  }

  # find the derivatives of log(mu)
  pars <- unlist(parskel)
  firstD <- numderiv(mu_fn, pars, linkfn=linkfn, ddf=ddf,
                     data=model$data, ds_newdata=u_ds_newdata, skel=parskel)
  if(!is.matrix(firstD)){
    firstD <- matrix(firstD, ncol=length(pars))
  }

  # put that in the data
  dat <- model$data
  dat[["XX"]] <- firstD

  # do a clever thing above to ensure we don't clobber
  # some other variable name

  ## build the model
  # update the formula to include the new term
  this_call$formula <- update.formula(this_call$formula,
                         paste0(paste(as.character(this_call$formula)[c(2,1,3)],
                                 collapse=" "), " + XX"))
  # update data
  this_call$data <- dat

  # now form the paraPen bit
  hess <- matrix(0, length(pars), length(pars))
  ii <- 1

  # loop over our detection functions, extracting the hessian
  for(i in seq_along(ddf)){
    this_ddf <- ddf[[i]]

    if("fake_ddf" %in% class(this_ddf)){
      next
    }

    # extract the hessian
    this_hess <- get_hessian(this_ddf)
    # drop that matrix into the big hessian
    hess[ii:(ii+nrow(this_hess)-1), ii:(ii+ncol(this_hess)-1)] <- this_hess

    ii <- ii+nrow(this_hess)
  }

  this_call$paraPen <- c(this_call$paraPen, list(XX=list(hess)))
  # tell gam.fixed.priors what to look for
  this_call$fixed.priors <- "XX"

  # set the trace on the scale parameter estimation
  this_call$scale.trace <- trace

  # is the scale estimated for this model?
  this_call$scale.estimated <- model$scale.estimated

  ## refit the model
  refit <- do.call("gam.fixed.priors", this_call)
  refit$data <- dat
  refit$ddf <- ddf
  class(refit) <- c("dsm", class(refit))

  # if no newdata was supplied warn and return
  if(is.null(newdata)){
    warning("No newdata provided, no predictions returned.")
    ret <- list(old_model = model,
                refit     = refit,
                pred      = NA,
                var       = NA,
                ses       = NA)
    class(ret) <- "dsm_varprop"
    return(ret)
  }

  # make some zeros for the paraPen term so they have no mean effect
  newdata[["XX"]] <- matrix(0, nrow(newdata), ncol(firstD))

  # get the standard errors
  # if we are using Vc we need to set unconditional=TRUE
  if(var.type=="Vc"){
    uncond <- TRUE
  }else{
    uncond <- FALSE
  }
  # also use this to find the NAs for e.g. soap where we need to remove them
  preds <- predict(refit, newdata=newdata, type="response", se.fit=TRUE,
                   unconditional=uncond)
  ses <- preds$se.fit
  nas <- is.na(preds$fit)

  # get everyone's favourite matrix
  Lp <- predict(refit, newdata=newdata, type="lpmatrix")[!nas, ]
  # predictions on the link scale
  pred <- Lp %*% coef(refit)
  pred <- newdata$off.set[!nas] * linkinvfn(pred)

  # get variance-covariance matrix
  vc <- refit[[var.type]]

  # this is why we can only use log link
  dNdbeta <- t(pred)%*%Lp

  # make a sandwich
  var_p <- dNdbeta %*% vc %*% t(dNdbeta)


  # what should we return?
  ret <- list(old_model = model,
              refit     = refit,
              pred      = pred,
              var       = var_p,
              ses       = ses)

  class(ret) <- "dsm_varprop"

  return(ret)
}