R/eFCM.R
In eFCM: Exponential Factor Copula Model

Documented in fcm

#' Fit the exponential Factor Copula Model (eFCM)
#'
#' Fits the eFCM at a specified grid point using local neighborhood data.
#'
#' @param s A single integer specifying the grid point index.
#' @param object An object of class \code{"fdata"}, typically created by [fdata()].
#' @param theta0 A numeric vector of initial values for the copula parameters (\eqn{\lambda, \delta}).
#' @param thres A numeric scalar indicating the quantile-based threshold (default is 0.9).
#' @param nu Numeric Matérn smoothness parameter.
#' @param hessian Logical; whether to return the Hessian matrix. Default is TRUE.
#' @param censorL Logical; if TRUE (default), uses the censored likelihood.
#' @param control A list of control options for \code{nlminb()}.
#' @param lower,upper Numeric vectors of parameter bounds for optimization.
#' @param progress Logical; if \code{TRUE} (default), show a progress bar during bootstrap
#'   using \pkg{pbapply}.
#' @param boot Logical; whether to perform bootstrap estimation (default \code{FALSE}).
#' @param R Integer; number of bootstrap replicates if \code{boot = TRUE}.
#' @param sample_prop Numeric in (0,1). Proportion of rows to sample in each replicate
#'   (default 0.9). Ignored if \code{sample_ids} is provided.
#' @param sample_ids Optional list of integer vectors. Each element specifies the row indices
#'   to use for a bootstrap replicate; when supplied, overrides \code{sample_prop}.
#' @param parallel Logical; if \code{TRUE}, run neighbourhood selection in parallel using \pkg{pbmcapply}.
#'   On Windows, \code{pbmclapply} will fall back to serial execution (progress still shown).
#' @param ncpus Integer; number of worker processes when \code{parallel = TRUE} on Unix-alikes.
#' @param mc.set.seed Logical; seed the RNG streams in workers (default \code{TRUE}).
#'   Effective on Unix-alikes; on Windows (serial fallback) it has no effect.
#' @param ... Additional arguments passed to \code{bootstrap_fcm()}.
#'
#'
#' @return An object of class \code{"fcm"}, which is a list including:
#' \item{pars}{Estimated parameters.}
#' \item{hessian}{Hessian matrix (if requested).}
#' \item{nllh}{Negative log-likelihood.}
#' \item{data.u}{Pseudo-uniform transformed data.}
#' \item{gridID}{Location of the selected grid point.}
#' \item{arg}{Model arguments (e.g., \code{thres}, \code{nu}).}
#' \item{neigh}{Neighbourhood indices used for estimation.}
#' \item{coord}{Coordinates of the locations.}
#' \item{data}{Observed data matrix at selected locations.}
#' \item{boot}{(optional) Matrix of bootstrap samples of parameter estimates.}
#'
#' @importFrom boot boot
#' @details
#' The exponential Factor Copula Model (eFCM) assumes that the process \eqn{W(s) = Z(s) + V}, where \eqn{Z(s)} is a zero-mean Gaussian process with correlation \eqn{\rho(h) = \exp(-h/\delta)} and \eqn{V \sim \text{Exp}(\lambda)} is a latent common factor independent of \eqn{Z(s)} and \eqn{s}. This leads to nontrivial tail dependence between spatial locations.
#'
#' @references
#' Castro-Camilo, D. and Huser, R. (2020). Local likelihood estimation of complex tail dependence structures, applied to US precipitation extremes. *JASA*, 115(531), 1037–1054.
#'
#' @seealso \code{\link{fdata}}, \code{\link{coef}}, \code{\link{summary}}
#'
#' @examples
#' \donttest{
#' # Load precipitation data for counterfactual scenarios
#' data("counterfactual")
#' data("LonLat")
#' coord = LonLat  # station coordinates (longitude-latitude)
#'
#' cf_data <- fdata(counterfactual, coord, cellsize = c(1, 1))
#' fit = fcm(s = 1, cf_data, boot = T, R = 1000)
#' }
#'
#' @srrstats {G1.0} *Statistical software should list at least one primary reference from published academic literature.*
#' @srrstats {G1.1} *The package implements a novel statistical method.*
#' @srrstats {G2.1} *The function implements assertions on types of inputs.*
#' @srrstats {G2.3a} *match.arg() or equivalent is used to restrict argument choices when appropriate.*
#' @srrstats {G2.13} *Missing data are checked prior to analysis.*
#' @srrstats {G5.4} *Correctness tests compare outputs against expected values.*
#' @srrstats {G5.6a} *Parameter recovery is tested with simulated data within acceptable tolerance.*
#' @srrstats {G5.7} *Performance is tested as data size or parameter settings vary.*
#'
#' @export
fcm <- function(s, object, theta0 = c(2, 100), thres = 0.9, nu = 0.5, hessian = TRUE,
                control = list(), censorL = TRUE, boot = FALSE, R = 1000, progress = TRUE,
                lower = c(1, 1), upper = Inf, sample_prop = 0.9, sample_ids  =  NULL,
                parallel = FALSE, ncpus = 4, mc.set.seed = TRUE, ...) {
  # Input assertions
  assert_numeric_scalar(s, "s")
  assert_class(object, "fdata", "object")
  assert_length(theta0, 2, "theta0")
  assert_numeric_scalar(thres, "thres"); assert_probability(thres, "thres")
  assert_numeric_scalar(nu, "nu")
  if (!is.logical(hessian) || length(hessian)!=1) stop("`hessian` must be a single logical value.", call. = FALSE)
  if (!is.logical(censorL) || length(censorL)!=1) stop("`censorL` must be a single logical value.", call. = FALSE)
  assert_non_negative(lower, "lower"); assert_no_missing(lower, "lower")
  assert_no_missing(upper, "upper")
  assert_numeric_scalar(R, "R"); R <- as.integer(R)
  if (!is.logical(boot) || length(boot)!=1) stop("`boot` must be a single logical value.", call. = FALSE)
  if (!is.logical(progress) || length(progress)!=1) stop("`progress` must be a single logical value.", call. = FALSE)

  # Prepare bounds
  lower[lower < 0] <- 0
  upper_out <- if (any(is.infinite(upper))) Inf else log(upper)
  lower_out <- log(lower)

  # Initial data transformation
  data_init <- fit_initial(as.matrix(object$coord), as.matrix(object$data), object$neigh[[s]] - 1)
  if (anyNA(data_init$data_u)) {
    warning("Missing values detected in data_u. Removing rows with NA.")
    data_init$data_u <- na.omit(data_init$data_u)
  }

  # Optimize parameters
  out <- optim(
    par     = log(theta0),
    fn      = model_likelihood,
    data_u  = data_init$data_u,
    coord   = as.matrix(data_init$coord),
    thres   = thres,
    nu      = nu,
    censorL = censorL,
    lower   = lower_out,
    upper   = upper_out,
    control = control
  )
  out$par <- exp(out$par)

  # Compute Hessian if requested
  if (isTRUE(hessian)) {
    out$hessian <- numDeriv::hessian(model_likelihood, log(out$par),
                           data_u = data_init$data_u,
                           coord = as.matrix(data_init$coord),
                           thres = thres,
                           nu = nu,
                           censorL = censorL)
  }

  # Populate output
  out$arg   <- list(thres = thres, nu = nu, censorL = censorL)
  out$grid  <- object$grid[s, ]
  out$neigh <- object$neigh[[s]]
  out$coord <- data_init$coord
  out$data.u <- data_init$data_u
  out$data <- as.matrix(object$data[, object$neigh[[s]]])

  # Bootstrap estimation
  if (isTRUE(boot)) {
    out$boot <- bootstrap_fcm(
      R           = R,
      s           = s,
      object      = object,
      theta0      = theta0,
      thres       = thres,
      nu          = nu,
      censorL     = censorL,
      control     = control,
      lower       = lower_out,
      upper       = upper_out,
      progress    = progress,
      sample_prop = sample_prop,
      sample_ids  = sample_ids,
      parallel    = parallel,
      ncpus       = ncpus,
      mc.set.seed = mc.set.seed,
      ...
    )
  }
  class(out) <- c("fcm", class(out))
  return(out)
}