Nothing
#' Transform datasets for factor copula modeling
#'
#' Prepares and organizes datasets for use with the exponential Factor Copula Model (eFCM). The function converts raw station-level observations and their spatial coordinates into an "fdata" object, which contains the data, grid structure, and neighborhood information required for model fitting with `fcm()`.
#'
#' @param data A matrix or data.frame. Each column corresponds to a station, with rows containing observations (on the original scale).
#' @param coord A two-column matrix or data frame of station coordinates (longitude and latitude), one row per station.
#' @param grid Optional two-column matrix or data frame of grid locations (longitude, latitude) at which the model will be fitted. If \code{NULL} (default), a regular grid is generated based on \code{cellsize}.
#' @param neigh Optional list of neighborhood station indices for each grid point. If \code{NULL}, neighborhoods
#' are constructed using [neighborhood_HT()].
#' @param theta0 Optional matrix or data.frame with two columns: initial lambda and delta. Must match number of stations.
#' @param cellsize Numeric vector of length 1 or 2, specifying longitude and latitude resolution.
#' @param parallel Logical; if \code{TRUE}, run neighbourhood selection in parallel using \pkg{pbmcapply}.
#' On Windows, \code{pbmclapply} will fall back to serial execution (progress still shown).
#' @param ncpus Integer; number of worker processes when \code{parallel = TRUE} on Unix-alikes.
#' @param mc.set.seed Logical; seed the RNG streams in workers (default \code{TRUE}).
#' Effective on Unix-alikes; on Windows (serial fallback) it has no effect.
#' @param ... Additional arguments passed to [neighborhood_HT()].
#'
#' @return An object of class `"fdata"`, which is a list with elements:
#' \item{data}{Original input data}
#' \item{coord}{Coordinates of stations}
#' \item{grid}{Grid points with assigned IDs}
#' \item{neigh}{List of neighbor station indices per grid point}
#' \item{theta0}{Initial values matrix}
#' \item{N}{Number of stations}
#'
#' @importFrom progress progress_bar
#' @examples
#' \donttest{
#' # Load precipitation data for counterfactual scenarios
#' data("counterfactual")
#' data("LonLat")
#' coord = LonLat # station coordinates (longitude-latitude)
#' cf_data <- fdata(counterfactual, coord, cellsize = c(1, 1))
#' }
#' @seealso [fcm()], [neighborhood_HT()]
#'
#' @srrstats {G2.0a} *Explicit documentation of expected input lengths*
#' @srrstats {G2.7} *Accepts standard tabular formats*
#' @srrstats {G2.8} *Performs input coercion and validation*
#' @srrstats {G2.9} *Warns about information loss in conversions*
#' @srrstats {G2.10} *Robust to column extraction across tabular types*
#' @srrstats {G5.2a} *All stop/warning messages are unique and testable*
#' @srrstats {G5.8a} *Handles zero-length input edge cases*
#' @srrstats {G5.8b} *Handles unsupported input types*
#'
#' @export
fdata <- function(data, coord, grid = NULL, neigh = NULL, theta0 = NULL, cellsize = c(0.5, 0.5), parallel = TRUE, ncpus = 4, mc.set.seed = TRUE, ...) {
# Input check
assert_tabular(data, "data")
assert_tabular(coord, "coord")
assert_no_missing(coord, "coord")
if (ncol(data) != nrow(coord)) {
stop("Number of stations (columns in `data`) does not match rows in `coord`.", call. = FALSE)
}
if (!is.null(theta0)) {
assert_tabular(theta0, "theta0")
if (nrow(theta0) != ncol(data)) {
stop("Number of rows in `theta0` must match number of stations.", call. = FALSE)
}
if (ncol(theta0) != 2) {
stop("`theta0` must have exactly two columns: lambda and delta.", call. = FALSE)
}
}
# Construct grid
if (is.null(grid)) {
if (length(cellsize) == 1) cellsize <- rep(cellsize, 2)
lon <- seq(min(coord[, 1]), max(coord[, 1]), by = cellsize[1])
lat <- seq(min(coord[, 2]), max(coord[, 2]), by = cellsize[2])
grid <- expand.grid(lon = lon, lat = lat)
}
if (ncol(grid) != 2) {
stop("`grid` must have exactly two columns (longitude, latitude).", call. = FALSE)
}
names(grid) <- c("lon", "lat")
# neighborhood selection
if (is.null(neigh)) {
if (nrow(grid) == 1) {
temp <- tryCatch(
neighborhood_HT(data, coord, unlist(grid[1, ]), ...),
error = function(e) e
)
neigh <- list(if (!inherits(temp, "error")) temp else NULL)
} else {
requireNamespace("pbmcapply", quietly = TRUE)
idx_seq <- seq_len(nrow(grid))
mc_cores <- if (isTRUE(parallel) && .Platform$OS.type != "windows") {
max(1L, as.integer(ncpus))
} else {
1L
}
neigh <- pbmcapply::pbmclapply(
idx_seq,
function(i) {
temp <- tryCatch(
neighborhood_HT(data, coord, unlist(grid[i, ]), ...),
error = function(e) e
)
if (!inherits(temp, "error")) temp else NULL
},
mc.cores = mc_cores,
mc.set.seed = mc.set.seed,
mc.preschedule = FALSE
)
}
message("Neighborhood selection completed.")
}
# Remove grid point without neighbors
emp <- tidy(neigh)
if (length(emp) > 0) {
neigh <- neigh[-emp]
grid <- grid[-emp, ]
}
if (nrow(grid) == 0) {
stop("All grid points were removed due to failed neighborhood selection.", call. = FALSE)
}
# output
grid <- cbind(ID = seq_len(nrow(grid)), grid)
out <- list(
data = data,
coord = coord,
grid = grid,
neigh = neigh,
theta0 = theta0,
N = ncol(data)
)
class(out) <- c("fdata", class(out))
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.