#' @title Fit a PARTS GLS model.
#'
#' @details conduct generalized least-squares regression of
#' spatiotemporal trends
#'
#' @param formula a model formula
#' @param data an optional data frame environment in which to search for
#' variables given by \code{formula}
#' @param V a covariance matrix, which must be positive definitive. This argument
#' is optional if \code{coords}, \code{distm_FUN}, \code{covar_FUN}, and
#' \code{covar.pars} are given instead.
#' @param nugget an optional numeric nugget, must be positive
#' @param formula0 an optional formula for the null model to be compared with
#' \code{formula} by an F-test
#' @param save.xx logical: should information needed for cross-partition
#' comparisons be returned?
#' @param save.invchol logical: should the inverse of the Cholesky matrix be
#' returned?
#' @param logLik.only logical: should calculations stop after calculating parital
#' log-likelihood?
#' @param no.F logical: should F-test calculations be made?
#' @param coords optional coordinate matrix for calculating \code{V} internally
#' @param distm_FUN optional function for calculating a distance matrix from
#' \code{coords}, when calculating \code{V} internally
#' @param covar_FUN optional distance-based covariance function for calculating
#' \code{V} internally
#' @param covar.pars an optional named list of parameters passed to \code{covar_FUN}
#' when calculating \code{V} internally
#' @param invCholV optional pre-calculated inverse cholesky matrix to use in place
#' of \code{V}
#' @param ncores an optional integer indicating how many CPU threads to use for
#' matrix calculations.
#' @param suppress_compare_warning an optional variable to suppress warning that
#' arises from identical \code{formula} and \code{formula0}.
#' @param ... additional arguments passed to \code{optimize_nugget}, which are
#' only used if if \code{nugget = NA}
#'
#' @details \code{fitGLS} fits a GLS model, using terms specified in \code{formula}.
#' In the PARTS method, generally the left side of \code{formula} should be
#' pixel-level trend estimates and the right side should be spatial predictors.
#' The errors of the GLS are correlated according to covariance matrix \code{V}.
#'
#' If \code{nugget = NA}, an ML nugget is estimated from the data using the
#' \code{optimize_nugget()} function. Arguments additional arguments (\code{...})
#' are passed to \code{optimize_nugget} in this case. \code{V} must be provided
#' for nugget optimization.
#'
#' If \code{formula0} is not specified, the default is to fit an intercept-only
#' null model.
#'
#' \code{save.xx} is included to allow for manually conducting a partitioned
#' GLS analyses. Because most users will not need this feature, opting instead
#' to use \code{fitGLS_parition()}, \code{save.xx = FALSE} by default.
#'
#' Similarly, \code{save.invchol} is included to allow for recycling of the
#' inverse cholesky matrix. Often, inverting the large cholesky matrix
#' (i.e., \code{invert_chol(V)}) is the slowest part of GLS. This argument exists
#' to allow users to recycle this process, though no \code{remotePARTS} function
#' currently exists that can use \code{invert_chol(V)} to fit the GLS.
#'
#' \code{logLik.only = TRUE} will return only the partial log-likelihood, which can
#' minimized to obtain the maximum likelihood for a given set of data.
#'
#' If \code{no.F = TRUE}, then the model given by \code{formula} is not compared
#' to the model given by \code{formula0}.
#'
#' If \code{V} is not provided, it can be fit internally by specifying all of
#' \code{coords}, \code{distm_FUN}, \code{covar_FUN}, and \code{covar.pars}.
#' The function given by \code{distm_FUN} will calculate a distance matrix from
#' \code{coords}, which is then transformed into a distance-based covariance
#' matrix with \code{covar_FUN} and parameters given by \code{covar.pars}.
#'
#' This function uses C++ code that uses the Eigen matrix library (RcppEigen
#' package) to fit models as efficiently as possible. As such, all available
#' CPU cores are used for matrix calculations on systems with OpenMP
#' support.
#'
#' \code{ncores} is passed to the C++ code Eigen::setNpThreads() which sets
#' the number of cores used for compatible Eigen matrix operations (when OpenMP
#' is used).
#'
#' @return \code{fitGLS} returns a list object of class "remoteGLS", if
#' \code{logLik.only = FALSE}. The list contains at least the following elements:
#'
#' \describe{
#' \item{coefficients}{coefficient estimates for predictor variables}
#' \item{SSE}{sum of squares error}
#' \item{MSE}{mean squared error}
#' \item{SE}{standard errors}
#' \item{df_t}{degrees of freedom for the t-test}
#' \item{logDetV}{log-determinant of V}
#' \item{tstat}{t-test statistic}
#' \item{pval_t}{p-value of the t-statistic}
#' \item{logLik}{the Log-likelihood of the model}
#' \item{nugget}{the nugget used in fitting}
#' \item{covar_coef}{the covariance matrix of the coefficients}
#' }
#'
#' If \code{no.F = FALSE}, the following elements, corresponding to the null
#' model and F-test are also calculated:
#'
#' \describe{
#' \item{coefficients0}{coefficient estimates for the null model}
#' \item{SSE0}{sum of squares error for the null model}
#' \item{MSE0}{mean squared error for the null model}
#' \item{SE0}{the standard errors for null coefficients}
#' \item{MSR}{the regression mean square}
#' \item{df0}{the null model F-test degrees of freedom}
#' \item{LL0}{the log-likelihood of the null model}
#' \item{df_F}{the F-test degrees of freedom, for the main model}
#' \item{Fstat}{the F-statistic}
#' \item{pval_F}{the F-test p-value}
#' \item{formula}{the alternate formula used}
#' \item{formula0}{the null formula used}
#' }
#'
#' An attribute called also set to \code{"no.F"} is set to the value of
#' argument \code{no.F}, which signals to generic methods how to handle the output.
#'
#' If \code{save.invchol = TRUE}, output also includes
#'
#' \describe{
#' \item{invcholV}{the inverse of the Cholesky decomposition of the covariance
#' matrix obtained with \code{invert_chol(V, nugget)} }
#' }
#'
#' If \code{save.xx = TRUE}, output also includes the following elements
#'
#' \describe{
#' \item{xx}{the predictor variables \code{X}, from the right side of \code{formula},
#' transformed by the inverse cholesky matrix: xx = \code{invcholV \%*\% X} }
#' \item{xx0}{the predictor variables \code{X0}, from the right side of \code{formula0},
#' transformed by the inverse cholesky matrix: xx0 = \code{invcholV \%*\% X0} }
#' }
#'
#' The primary use of \code{xx} and \code{xx0} is for use with \code{fitGLS_partition()}.
#'
#' If \code{logLik.only = TRUE}, a single numeric output containing the
#' log-likelihood is returned.
#'
#' @examples
#'
#' ## read data
#' data(ndvi_AK10000)
#' df = ndvi_AK10000[seq_len(200), ] # first 200 rows
#'
#' ## fit covariance matrix
#' V = covar_exp(distm_scaled(cbind(df$lng, df$lat)), range = .01)
#'
#' ## run GLS
#' (GLS = fitGLS(CLS_coef ~ 0 + land, data = df, V = V))
#'
#' ## with F-test calculations to compare with the NULL model
#' (GLS.F = fitGLS(CLS_coef ~ 0 + land, data = df, V = V, no.F = FALSE))
#'
#' ## find ML nugget
#' fitGLS(CLS_coef ~ 0 + land, data = df, V = V, no.F = FALSE, nugget = NA)
#'
#' ## calculate V internally
#' coords = cbind(df$lng, df$lat)
#' fitGLS(CLS_coef ~ 0 + land, data = df, logLik.only = FALSE, coords = coords,
#' distm_FUN = "distm_scaled", covar_FUN = "covar_exp", covar.pars = list(range = .01))
#'
#' ## use inverse cholesky
#' fitGLS(CLS_coef ~ 0 + land, data = df, invCholV = invert_chol(V))
#'
#' ## save inverse cholesky matrix
#' invchol = fitGLS(CLS_coef ~ 0 + land, data = df, V = V, save.invchol = TRUE)$invcholV
#'
#' ## re-use inverse cholesky instead of V
#' fitGLS(CLS_coef ~ 0 + land, data = df, invCholV = invchol)
#'
#' ## Log-likelihood (fast)
#' fitGLS(CLS_coef ~ 0 + land, data = df, V = V, logLik.only = TRUE)
#'
#' @export
fitGLS <- function(formula, data, V, nugget = 0, formula0 = NULL, save.xx = FALSE,
save.invchol = FALSE, logLik.only = FALSE, no.F = FALSE,
coords, distm_FUN ,covar_FUN, covar.pars, invCholV, ncores = NA,
suppress_compare_warning = FALSE,
...){
if(is.na(ncores)){
ncores = 1
} else {
ncores = as.integer(ncores)
}
# Parse formula arguments to make model matrix
call <- match.call() # function call
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf[[1L]] <- quote(stats::model.frame) # rename the function call
mf$drop.unused.levels = TRUE
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
y <- as.double(stats::model.response(mf, "numeric"))
if (is.matrix(y)) {stop("response is a matrix: must be a vector (only 1 response)")}
X <- stats::model.matrix(mt, mf, contrasts = NULL)
if (ncol(X) < 1){stop("invalid model matrix: no columns (no intercept or independent variables)")}
rm(mf) # delete the large model frame from memory
# Use invCholV if provided
if (missing(invCholV) || is.null(invCholV)){
# print("no inverse cholesky provided")
invCholV = diag(1) # default matrix to pass to C++
use_invCholV = FALSE
} else {
if (is.na(nugget) & missing(V)){
stop("nugget cannot be optimized with pre-calculated invCholV, use V instead")
}
# print("inverse cholesky WAS provided")
invCholV = invCholV
use_invCholV = TRUE
}
# calculate V, if needed
if (missing(V) & !use_invCholV) {
if (any(missing(coords), missing(distm_FUN), missing(covar_FUN))) {
stop("If V or invCholV are not provided, then coords, distm_FUN, covar_FUN, and covar.pars are needed to calculate V")
}
if (nrow(coords) != nrow(X)){
stop("Rows of coords do not match rows in data")
}
dist.f = match.fun(distm_FUN)
V <- dist.f(coords) # distance
pars = append(list(V), covar.pars) # pars list
covar.f = match.fun(covar_FUN)
V <- do.call(covar.f, pars) # covariance matrix
nv = nrow(V)
} else if (missing(V) & use_invCholV){
V = diag(1) # default matrix to pass to C++
nv <- nrow(invCholV)
} else {nv = nrow(V)}
# Build null model
if (is.null(formula0)){ # formula
formula0 = update(as.formula(formula), . ~ 1)
} else {
formula0 = as.formula(formula0)
}
# check formula objects
## identical formulas, model comparison not useful
if (!suppress_compare_warning & formula0 == formula & no.F == FALSE){
warning("formula and formula0 are identical. Model comparison calculations (F-test) not possible.")
}
## no right-hand side (i.e., formula(x ~ 0) or update(form, . ~ 0))
if(formula[-2]=="~0" | formula0[-2]=="~0" | formula[-2]=="~1 - 1" | formula0[-2]=="~1 - 1"){
stop("invalid formula: the right hand side may not be empty ('~0')")
}
X0 <- if (missing(data) || is.null(data)) { # conditionally assign X0
model.matrix(formula0)
} else {
model.matrix(formula0, data)
}
# Checks
## check positive definitive
if (!all(check_posdef(V))) {
if (logLik.only){
warning("V is not positive definitive")
return(NA) # return NA for logLik function
}
stop("V is not positive definitive")
}
# if (!all(check_posdef(invCholV))){
# stop("invCholV is not positive definitive")
# }
## check for correct dimensions
if (!all.equal(length(y), nrow(X), nrow(X0), nv)) {
stop("Input dimension mismatch")
}
## check that all variables are numeric
if (!all(is.double(y), is.double(X), is.double(V), is.double(invCholV), is.double(X0))) {
stop("All inputs must be numeric (double precision)")
}
# Handle missing nugget
if (is.na(nugget)) {
nugget = optimize_nugget(X = X, y = y, V = V, ...)
}
# Run GLS
GLS <- .Call(`_remotePARTS_fitGLS_cpp`, X, V, y, X0,
nugget, save.xx, save.invchol, logLik.only, no.F,
optimize_nugget = FALSE, nug_l = 0, nug_u = 1, nug_tol = 1e-5,
invCholV = invCholV, use_invCholV = use_invCholV, ncores = ncores)
# return only log-likelihood, if prompted
if(logLik.only){
return(unlist(GLS$logLik))
}
# pvalues
GLS$pval_t <- 2 * pt(abs(GLS$tstat), df = GLS$df_t, lower.tail = F)
if(!no.F){GLS$pval_F <- pf(GLS$Fstat, df1 = GLS$df_F[1], df2 = GLS$df_F[2], lower.tail = F)}
# Update list elements
GLS <- append(list(call = call), GLS)
colnames(GLS$covar_coef) = rownames(GLS$covar_coef) =names(GLS$coefficients) =
names(GLS$SE) = names(GLS$tstat) = names(GLS$pval_t) = colnames(X)
# GLS$predictors = colnames(X)
# GLS$nugget = nugget
GLS$formula = deparse(as.formula(formula))
GLS$formula0 = deparse(as.formula(formula0))
## class and attributes
class(GLS) <- append("remoteGLS", class(GLS))
attr(GLS, "no.F") <- no.F
## Return ----
return(GLS)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.