Nothing
#' rSPDE linear mixed effects models
#'
#' Fitting linear mixed effects model with latent Whittle-Matern models.
#'
#' @param formula Formula object describing the relation between the response variables and the fixed effects. If the response variable is a matrix, each column of the matrix will be treated as a replicate.
#' @param loc A vector with the names of the columns in `data` that contain the observation locations, or a `matrix` or a `data.frame` containing the observation locations. If the model is of class `metric_graph`, the locations must be either a `matrix` or a `data.frame` with two columns, or a character vector with the names of the two columns. The first column being the number of the edge, and the second column being the normalized position on the edge. If the model is a 2d model, `loc` must be either a `matrix` or `data.frame` with two columns or a character vector with the name of the two columns that contain the location, the first entry corresponding to the `x` entry and the second corresponding to the `y` entry.
#' @param data A `data.frame` containing the data to be used.
#' @param model Either an object generated by `matern.operators()` or `spde.matern.operators()`. If `NULL`, a simple linear regression will be performed.
#' @param repl Vector indicating the replicate of each observation. If `NULL` it will assume there is only one replicate.
#' @param which_repl Which replicates to use? If `NULL` all replicates will be used.
#' @param optim_method The method to be used with `optim` function.
#' @param use_data_from_graph Logical. Only for models generated from graphs from `metric_graph` class. In this case, should the data, the locations and the replicates be obtained from the graph object?
#' @param starting_values_latent A vector containing the starting values for the latent model. If the latent model was generated by `matern.operators()`, then the starting values should be provided as a vector of the form c(tau,kappa). If the model was generated by `spde.matern.operators()`, then the starting values should be provided as a vector containing the nonstationary parameters.
#' @param start_sigma_e Starting value for the standard deviation of the measurament error.
#' @param start_nu Starting value for the smoothness parameter.
#' @param start_alpha Starting value for the smoothness parameter. Will be used if `start_nu` is not given.
#' @param nu If `NULL`, the smoothness parameter will be estimated, otherwise the smoothness parameter will be kept fixed at the provided value.
#' @param alpha If `NULL`, the smoothness parameter will be estimated, otherwise the smoothness parameter will be kept fixed at the provided value. Will be used if `nu` is not given.
# @param model_matrix logical indicating whether the model matrix should be returned as component of the returned value.
#' @param nu_upper_bound A parameter that limits the maximum value that nu can assume.
#' @param rspde_order The order of the rational approximation to be used while fitting the model. If not given, the order from the model object will be used.
#' @param parallel logical. Indicating whether to use optimParallel or not.
#' @param n_cores Number of cores to be used if parallel is true.
#' @param optim_controls Additional controls to be passed to `optim` or `optimParallel`.
# @param improve_gradient Should a more precise estimate of the gradient be obtained? Turning on might increase the overall time. If `TRUE`, the "Richardson" method will be used. See the help of the `grad` function in `numDeriv` package for details.
# @param gradient_args List of controls to be used for the gradient. The list can contain the arguments to be passed to the `method.args` argument in the `numDeriv::grad` function. See the help of the `grad` function in `numDeriv` package for details.
#' @param improve_hessian Should a more precise estimate of the hessian be obtained? Turning on might increase the overall time.
#' @param hessian_args List of controls to be used if `improve_hessian` is `TRUE`. The list can contain the arguments to be passed to the `method.args` argument in the `numDeriv::hessian` function. See the help of the `hessian` function in `numDeriv` package for details. Observe that it only accepts the "Richardson" method for now, the method "complex" is not supported.
#' @return A list containing the fitted model.
#' @rdname rspde_lme
#' @export
#'
rspde_lme <- function(formula, loc, data,
model = NULL, repl = NULL,
which_repl = NULL,
optim_method = "L-BFGS-B",
use_data_from_graph = TRUE,
starting_values_latent = NULL,
start_sigma_e = NULL,
start_alpha = NULL,
alpha = NULL,
start_nu = NULL,
nu = NULL,
nu_upper_bound = 4,
rspde_order = NULL,
# model_matrix = TRUE,
parallel = FALSE,
n_cores = parallel::detectCores()-1,
optim_controls = list(),
# improve_gradient = FALSE,
# gradient_args = list(),
improve_hessian = FALSE,
hessian_args = list()) {
null_model <- TRUE
if(!is.null(model)){
if(!inherits(model, c("CBrSPDEobj","rSPDEobj"))){
stop("The model should be an object of class 'CBrSPDEobj' or 'rSPDEobj'.")
}
null_model <- FALSE
model <- update(model, parameterization = "spde")
}
estimate_nu <- TRUE
estimated_alpha <- NULL
if(!is.null(nu)){
estimate_nu <- FALSE
if(!is.numeric(nu)){
stop("nu must be numeric!")
}
if(length(nu)>1){
stop("nu must have length 1")
}
if(nu < 0){
stop("nu must be positive.")
}
alpha <- nu + model$d/2
}
if(!is.null(alpha)){
estimate_nu <- FALSE
if(!is.numeric(alpha)){
stop("alpha must be numeric!")
}
if(length(alpha)>1){
stop("alpha must have length 1")
}
if(alpha <= model$d/2){
stop(paste("alpha must be greater than dim/2 = ",model$d/2))
}
nu <- alpha - model$d/2
}
if(!is.null(rspde_order) && !is.null(model)){
model <- update(model, user_m = rspde_order)
} else if (!is.null(model)){
rspde_order <- model$m
} else{
rspde_order <- NULL
}
if (is.null(formula)) {
stop("No formula provided!")
}
call_rspde_lme <- match.call()
likelihood_new <- NULL
new_likelihood <- NULL
if(null_model){
model <- list(has_graph = FALSE,
stationary = FALSE)
}
time_data_start <- Sys.time()
if (missing(data) && (!model$has_graph)) {
data <- environment(formula)
} else if(model$has_graph){
if(use_data_from_graph){
if(is.null(model$graph$.__enclos_env__$private$data)){
stop("The graph has no data! Either add data to the graph, or add the data manually and set 'use_data_from_graph' to FALSE.")
}
data <- model$graph$.__enclos_env__$private$data
repl <- model$graph$.__enclos_env__$private$data[[".group"]]
if(missing(loc)){
# Don't do anything, we will replace loc anyway
}
loc <- cbind(model$graph$.__enclos_env__$private$data[[".edge_number"]],
model$graph$.__enclos_env__$private$data[[".distance_on_edge"]])
}
}
y_term <- stats::terms(formula)[[2]]
y_resp <- eval(y_term, envir = data, enclos = parent.frame())
y_resp <- as.numeric(y_resp)
cov_term <- stats::delete.response(terms(formula))
X_cov <- stats::model.matrix(cov_term, data)
if(all(dim(X_cov) == c(0,1))){
names_temp <- colnames(X_cov)
X_cov <- matrix(1, nrow = length(y_resp))
colnames(X_cov) <- names_temp
}
if(is.null(repl)){
repl <- rep(1, length(y_resp))
}
if(is.null(which_repl)){
which_repl <- unique(repl)
}
which_repl <- unique(which_repl)
if(length(setdiff(which_repl, repl))>0){
warning("There are elements in 'which_repl' that are not in 'repl'.")
}
idx_repl <- (repl %in% which_repl)
y_resp <- y_resp[idx_repl]
if(ncol(X_cov)>0){
X_cov <- X_cov[idx_repl, , drop = FALSE]
} else {
X_cov <- matrix(ncol=0,nrow=0)
}
repl <- repl[idx_repl]
time_data_end <- Sys.time()
time_data <- time_data_end - time_data_start
if(!null_model){
time_build_likelihood_start <- Sys.time()
if(is.null(starting_values_latent)){
if(!model$stationary){
if(is.null(model$theta)){
stop("For models given by spde.matern.operators(), theta must be non-null!")
}
starting_values_latent <- model$theta
} else{
# if(model$parameterization == "spde"){
starting_values_latent <- log(c(model$tau, model$kappa))
# } else if(model$parameterization == "matern"){
# starting_values_latent <- log(c(model$sigma, model$range))
# }
}
} else{
if(model$stationary){
if(length(starting_values_latent)!=2){
stop("starting_values_latent must be a vector of length 2.")
}
if(any(starting_values_latent<0)){
stop("For stationary models, the values of starting_values_latent must be positive.")
}
} else{
if(length(starting_values_latent)!=ncol(model$B.tau)){
stop("starting_values_latent must be a vector of the same length as the number of the covariates for the latent model.")
}
}
}
if(estimate_nu){
if(is.null(start_nu) && is.null(start_alpha)){
start_values <- c(log(c(0.1*sd(y_resp), model$nu)),starting_values_latent)
} else if(!is.null(start_nu)){
if(!is.numeric(start_nu)){
stop("start_nu must be numeric.")
}
if(length(start_nu>1)){
stop("start_nu must have length 1.")
}
if(start_nu <= 0){
stop("start_nu must be positive")
}
start_values <- c(log(c(0.1*sd(y_resp), start_nu)),starting_values_latent)
} else {
if(!is.numeric(start_alpha)){
stop("start_alpha must be numeric.")
}
if(length(start_alpha>1)){
stop("start_alpha must have length 1.")
}
if(start_alpha <= model$d/2){
stop(paste("start_alpha must be greater than dim/2 =", model$d/2))
}
start_values <- c(log(c(0.1*sd(y_resp), start_alpha - model$d/2)),starting_values_latent)
}
} else{
start_values <- c(log(0.1*sd(y_resp)), starting_values_latent)
}
if(!is.null(start_sigma_e)){
start_values[1] <- log(start_sigma_e)
}
if(is.data.frame(loc) || is.matrix(loc)){
loc_df <- loc
} else if(is.character(loc)){
if(!model$has_graph){
dim <- model$d
if(length(loc) != dim){
stop("If 'loc' is a character vector, it must have the same length as the dimension (unless model comes from a metric graph).")
}
if(dim == 1){
loc_df <- matrix(data[[loc[1]]], ncol=1)
} else if (dim == 2){
loc_df <- cbind(as.vector(data[[loc[1]]]),
as.vector(data[[loc[2]]]))
}
} else{
if(length(loc)!=2){
stop("For a metric graph, 'loc' must have length two.")
}
loc_df <- cbind(as.vector(data[[loc[1]]]),
as.vector(data[[loc[2]]]))
}
} else{
stop("loc must be either a matrix, a data.frame or a character vector with the names of the columns of the observation locations.")
}
repl_val <- unique(repl)
A_list <- list()
# y_list <- list()
# X_cov_list <- list()
# has_cov <- FALSE
# if(ncol(X_cov) > 0){
# has_cov <- TRUE
# }
loc_df <- loc_df[idx_repl, ,drop=FALSE]
if(!is.null(model$make_A)) {
for(j in repl_val){
ind_tmp <- (repl %in% j)
y_tmp <- y_resp[ind_tmp]
na_obs <- is.na(y_tmp)
# y_list[[as.character(j)]] <- y_tmp[!na_obs]
A_list[[as.character(j)]] <- model$make_A(loc_df[ind_tmp,,drop = FALSE])
A_list[[as.character(j)]] <- A_list[[as.character(j)]][!na_obs, , drop = FALSE]
# if(has_cov){
# X_cov_list[[as.character(j)]] <- X_cov[ind_tmp, , drop = FALSE]
# X_cov_list[[as.character(j)]] <- X_cov_list[[as.character(j)]][!na_obs, , drop = FALSE]
# }
if(inherits(model, "CBrSPDEobj")){
if(!is.null(alpha)){
if(alpha %% 1 != 0){
A_list[[as.character(j)]] <- kronecker(matrix(1, 1, model$m + 1), A_list[[as.character(j)]])
}
} else{
A_list[[as.character(j)]] <- kronecker(matrix(1, 1, model$m + 1), A_list[[as.character(j)]])
}
}
}
} else{
stop("When creating the model object using matern.operators() or spde.matern.operators(), you should either supply a graph, or a mesh, or mesh_loc (this last one only works for dimension 1).")
}
n_coeff_nonfixed <- length(start_values)
model_tmp <- model
model_tmp$mesh <- NULL
model_tmp$graph <- NULL
model_tmp$make_A <- NULL
if(inherits(model, "CBrSPDEobj")){
likelihood <- function(theta){
sigma_e <- exp(theta[1])
n_cov <- ncol(X_cov)
n_initial <- n_coeff_nonfixed
if(estimate_nu){
nu <- exp(theta[2])
if(nu %% 1 == 0){
nu <- nu - 1e-5
}
nu <- min(nu, 9.99)
gap <- 1
} else{
gap <- 0
}
if(model_tmp$stationary){
# if(model_tmp$parameterization == "spde"){
alpha <- nu + model$d/2
if(estimate_nu){
alpha <- max(1e-5 + model$d/2, alpha)
}
tau <- exp(theta[2+gap])
kappa <- exp(theta[3+gap])
model_tmp <- update.CBrSPDEobj(model_tmp,
user_alpha = alpha, user_tau = tau,
user_kappa = kappa, parameterization = "spde")
# } else if(model_tmp$parameterization == "matern"){
# sigma <- exp(theta[2+gap])
# range <- exp(theta[3+gap])
# model_tmp <- update.CBrSPDEobj(model_tmp,
# user_nu = nu,
# user_sigma = sigma, user_range = range,
# parameterization = "matern")
# }
} else{
theta_model <- theta[(2+gap):(n_initial)]
alpha <- nu + model$d/2
if(estimate_nu){
alpha <- max(1e-5 + model$d/2, alpha)
}
model_tmp <- update.CBrSPDEobj(model_tmp,
user_theta = theta_model,
user_alpha = alpha,
parameterization = "spde")
}
if(n_cov > 0){
beta_cov <- theta[(n_initial+1):(n_initial+n_cov)]
} else{
beta_cov <- NULL
}
loglik <- aux_lme_CBrSPDE.matern.loglike(object = model_tmp, y = y_resp, X_cov = X_cov, repl = repl,
A_list = A_list, sigma_e = sigma_e, beta_cov = beta_cov)
return(-loglik)
}
} else{
likelihood <- function(theta){
sigma_e <- exp(theta[1])
n_cov <- ncol(X_cov)
n_initial <- n_coeff_nonfixed
if(estimate_nu){
nu <- exp(theta[2])
if(nu %% 1 == 0){
nu <- nu - 1e-5
}
nu <- min(nu, nu_upper_bound)
gap <- 1
} else{
gap <- 0
}
if(model$stationary){
# if(model_tmp$parameterization == "spde"){
alpha <- nu + model$d/2
if(estimate_nu){
alpha <- max(1e-5 + model$d/2, alpha)
}
tau <- exp(theta[2+gap])
kappa <- exp(theta[3+gap])
model_tmp <- update.rSPDEobj(model_tmp,
user_alpha = alpha, user_tau = tau,
user_kappa = kappa, parameterization = "spde")
# } else if(model_tmp$parameterization == "matern"){
# sigma <- exp(theta[2+gap])
# range <- exp(theta[3+gap])
# model_tmp <- update.rSPDEobj(model_tmp,
# user_nu = nu,
# user_sigma = sigma, user_range = range,
# parameterization = "matern")
# }
} else{
theta_model <- theta[(2+gap):(n_initial)]
alpha <- nu + model$d/2
if(estimate_nu){
alpha <- max(1e-5 + model$d/2, alpha)
}
model_tmp <- update.rSPDEobj(model_tmp,
user_theta = theta_model,
user_alpha = alpha, parameterization = "spde")
}
if(n_cov > 0){
beta_cov <- theta[(n_initial+1):(n_initial+n_cov)]
} else{
beta_cov <- NULL
}
loglik <- aux_lme_rSPDE.matern.loglike(object = model_tmp, y = y_resp, X_cov = X_cov, repl = repl,
A_list = A_list, sigma_e = sigma_e, beta_cov = beta_cov)
return(-loglik)
}
}
if(ncol(X_cov)>0 && !is.null(model)){
names_tmp <- colnames(X_cov)
data_tmp <- cbind(y_resp, X_cov)
data_tmp <- na.omit(data_tmp)
temp_coeff <- lm(data_tmp[,1] ~ data_tmp[,-1] - 1)$coeff
names(temp_coeff) <- names_tmp
start_values <- c(start_values, temp_coeff)
rm(data_tmp)
}
time_build_likelihood_end <- Sys.time()
time_build_likelihood <- time_build_likelihood_end - time_build_likelihood_start
hessian <- TRUE
if(improve_hessian){
hessian <- FALSE
}
time_par <- NULL
likelihood_new <- function(theta){
l_tmp <- tryCatch(likelihood(theta),
error = function(e){return(NULL)})
if(is.null(l_tmp)){
return(10^100)
}
return(l_tmp)
}
if(parallel){
start_par <- Sys.time()
n_cores_lim <- Sys.getenv("_R_CHECK_LIMIT_CORES_", "")
if (nzchar(n_cores_lim) && n_cores_lim == "TRUE") {
n_cores <- 2L
}
cl <- parallel::makeCluster(n_cores)
parallel::setDefaultCluster(cl = cl)
parallel::clusterExport(cl, "y_resp", envir = environment())
parallel::clusterExport(cl, "model_tmp", envir = environment())
parallel::clusterExport(cl, "A_list", envir = environment())
parallel::clusterExport(cl, "X_cov", envir = environment())
# parallel::clusterExport(cl, "y_list", envir = environment())
parallel::clusterExport(cl, "aux_lme_CBrSPDE.matern.loglike",
envir = as.environment(asNamespace("rSPDE")))
parallel::clusterExport(cl, "aux_lme_rSPDE.matern.loglike",
envir = as.environment(asNamespace("rSPDE")))
end_par <- Sys.time()
time_par <- end_par - start_par
start_fit <- Sys.time()
res <- optimParallel::optimParallel(start_values,
likelihood_new, method = optim_method,
control = optim_controls,
hessian = hessian,
parallel = list(forward = FALSE, cl = cl,
loginfo = FALSE))
end_fit <- Sys.time()
time_fit <- end_fit-start_fit
parallel::stopCluster(cl)
time_hessian <- NULL
if(!is.na(res[1])){
if(!improve_hessian){
observed_fisher <- res$hessian
} else{
if(!is.list(hessian_args)){
stop("hessian_controls must be a list")
}
start_hessian <- Sys.time()
observed_fisher <- numDeriv::hessian(likelihood_new, res$par,
method.args = hessian_args)
end_hessian <- Sys.time()
time_hessian <- end_hessian-start_hessian
}
eig_hes <- eigen(observed_fisher)$value
cond_pos_hes <- (min(eig_hes) > 1e-15)
} else{
stop("Could not fit the model. Please, try another method with 'parallel' set to FALSE.")
}
if(min(eig_hes) < 1e-15){
warning("The optimization failed to provide a numerically positive-definite Hessian. You can try to obtain a positive-definite Hessian by setting 'improve_hessian' to TRUE or by setting 'parallel' to FALSE, which allows other optimization methods to be used.")
}
} else{
possible_methods <- c("Nelder-Mead", "L-BFGS-B", "BFGS", "CG")
start_fit <- Sys.time()
res <- withCallingHandlers(tryCatch(optim(start_values,
likelihood_new, method = optim_method,
control = optim_controls,
hessian = hessian), error = function(e){return(NA)}),
warning = function(w){invokeRestart("muffleWarning")})
end_fit <- Sys.time()
time_fit <- end_fit-start_fit
cond_pos_hes <- FALSE
time_hessian <- NULL
if(!is.na(res[1])){
if(!improve_hessian){
observed_fisher <- res$hessian
} else{
if(!is.list(hessian_args)){
stop("hessian_controls must be a list")
}
start_hessian <- Sys.time()
observed_fisher <- numDeriv::hessian(likelihood_new, res$par,
method.args = hessian_args)
end_hessian <- Sys.time()
time_hessian <- end_hessian-start_hessian
}
eig_hes <- eigen(observed_fisher)$value
cond_pos_hes <- (min(eig_hes) > 1e-15)
}
problem_optim <- list()
if(is.na(res[1]) || !cond_pos_hes){
problem_optim[[optim_method]] <- list()
if(is.na(res[1])){
problem_optim[[optim_method]][["lik"]] <- NA
problem_optim[[optim_method]][["res"]] <- res
problem_optim[[optim_method]][["hess"]] <- NA
problem_optim[[optim_method]][["time_hessian"]] <- NA
problem_optim[[optim_method]][["time_fit"]] <- NA
} else{
problem_optim[[optim_method]][["lik"]] <- -res$value
problem_optim[[optim_method]][["res"]] <- res
problem_optim[[optim_method]][["hess"]] <- observed_fisher
problem_optim[[optim_method]][["time_hessian"]] <- time_hessian
problem_optim[[optim_method]][["time_fit"]] <- time_fit
}
}
ok_optim <- FALSE
orig_optim_method <- optim_method
test_other_optim <- (is.na(res[1]) || !cond_pos_hes)
if(test_other_optim){
tmp_method <- optim_method
while(length(possible_methods)>1){
possible_methods <- setdiff(possible_methods, tmp_method)
new_method <- possible_methods[1]
time_fit <- NULL
start_fit <- Sys.time()
res <- withCallingHandlers(tryCatch(optim(start_values,
likelihood_new, method = new_method,
control = optim_controls,
hessian = hessian), error = function(e){return(NA)}),
warning = function(w){invokeRestart("muffleWarning")})
end_fit <- Sys.time()
time_fit <- end_fit-start_fit
tmp_method <- new_method
cond_pos_hes <- FALSE
if(is.na(res[1])){
problem_optim[[tmp_method]][["lik"]] <- NA
problem_optim[[tmp_method]][["res"]] <- res
problem_optim[[tmp_method]][["hess"]] <- NA
problem_optim[[tmp_method]][["time_hessian"]] <- NA
problem_optim[[tmp_method]][["time_fit"]] <- NA
} else{
if(!improve_hessian){
observed_fisher <- res$hessian
} else{
if(!is.list(hessian_args)){
stop("hessian_controls must be a list")
}
start_hessian <- Sys.time()
observed_fisher <- numDeriv::hessian(likelihood_new, res$par,
method.args = hessian_args)
end_hessian <- Sys.time()
time_hessian <- end_hessian-start_hessian
}
eig_hes <- eigen(observed_fisher)$value
cond_pos_hes <- (min(eig_hes) > 1e-15)
problem_optim[[tmp_method]][["lik"]] <- -res$value
problem_optim[[tmp_method]][["res"]] <- res
problem_optim[[tmp_method]][["hess"]] <- observed_fisher
problem_optim[[tmp_method]][["time_hessian"]] <- time_hessian
problem_optim[[tmp_method]][["time_fit"]] <- time_fit
}
cond_ok <- ((!is.na(res[1])) && cond_pos_hes)
if(cond_ok){
optim_method <- new_method
ok_optim <- TRUE
break
}
}
}
if(test_other_optim){
lik_val <- lapply(problem_optim, function(dat){dat[["lik"]]})
if(all(is.na(lik_val))){
stop("The model could not be fitted. All optimizations method failed.")
} else if(ok_optim){
warning(paste("optim method",orig_optim_method,"failed to provide a positive-definite Hessian. Another optimization method was used."))
rm(problem_optim)
} else{
max_method <- which.max(lik_val)
res <- problem_optim[[max_method]][["res"]]
observed_fisher <- problem_optim[[max_method]][["hess"]]
time_hessian <- problem_optim[[max_method]][["time_hessian"]]
time_fit <- problem_optim[[max_method]][["time_fit"]]
warning("All optimization methods failed to provide a numerically positive-definite Hessian. The optimization method with largest likelihood was chosen. You can try to obtain a positive-definite Hessian by setting 'improve_hessian' to TRUE.")
}
}
}
if(model$stationary){
coeff <- exp(c(res$par[1:n_coeff_nonfixed]))
if(estimate_nu){
estimated_alpha <- coeff[2] + model$d/2
}
} else{
coeff <- res$par[1:n_coeff_nonfixed]
coeff[1] <- exp(coeff[1])
if(estimate_nu){
coeff[2] <- exp(coeff[2])
estimated_alpha <- coeff[2] + model$d/2
}
}
coeff <- c(coeff, res$par[-c(1:n_coeff_nonfixed)])
loglik <- -res$value
n_fixed <- ncol(X_cov)
n_random <- length(coeff) - n_fixed - 1
if(model$stationary){
par_change <- diag(c(exp(-c(res$par[1:n_coeff_nonfixed])), rep(1,n_fixed)))
observed_fisher <- par_change %*% observed_fisher %*% par_change
}
inv_fisher <- tryCatch(solve(observed_fisher), error = function(e) matrix(NA, nrow(observed_fisher), ncol(observed_fisher)))
std_err <- sqrt(diag(inv_fisher))
coeff_random <- coeff[2:(n_coeff_nonfixed)]
std_random <- std_err[2:(n_coeff_nonfixed)]
if(model$stationary){
# if(model$parameterization == "spde"){
par_names <- c("tau", "kappa")
# } else if(model$parameterization == "matern"){
# par_names <- c("sigma", "range")
# }
} else{
par_names <- c("Theta 1")
if(ncol(model$B.tau)>2){
for(i in 2:(ncol(model$B.tau)-1)){
par_names <- c(par_names, paste("Theta",i))
}
}
}
if(estimate_nu){
par_names <- c("nu", par_names)
}
names(coeff_random) <- par_names
coeff_meas <- coeff[1]
names(coeff_meas) <- "std. dev"
std_meas <- std_err[1]
coeff_fixed <- NULL
if(n_fixed > 0){
coeff_fixed <- coeff[(2+n_random):length(coeff)]
std_fixed <- std_err[(2+n_random):length(coeff)]
} else{
std_fixed <- NULL
}
new_likelihood <- NULL
if(model$stationary){
time_matern_par_start <- Sys.time()
new_likelihood <- function(theta){
new_par <- res$par
if(estimate_nu){
new_par[3:4] <- theta
} else{
new_par[2:3] <- theta
}
return(likelihood(new_par))
}
if(estimate_nu){
coeff_random_nonnu <- coeff_random[-1]
new_observed_fisher <- observed_fisher[3:4,3:4]
} else{
coeff_random_nonnu <- coeff_random
new_observed_fisher <- observed_fisher[2:3,2:3]
}
if(estimate_nu){
change_par <- change_parameterization_lme(new_likelihood, model$d, coeff_random[1], coeff_random_nonnu,
hessian = new_observed_fisher #,
# improve_gradient = improve_gradient,
# gradient_args = gradient_args
)
} else{
change_par <- change_parameterization_lme(new_likelihood, model$d, nu, coeff_random_nonnu,
hessian = new_observed_fisher #,
# improve_gradient = improve_gradient,
# gradient_args = gradient_args
)
}
matern_coeff <- list()
matern_coeff$random_effects <- coeff_random
if(estimate_nu){
names(matern_coeff$random_effects) <- c("nu", "sigma", "range")
matern_coeff$random_effects[2:3] <- change_par$coeff
} else{
matern_coeff$random_effects <- change_par$coeff
names(matern_coeff$random_effects) <- c("sigma", "range")
}
matern_coeff$std_random <- std_random
if(estimate_nu){
matern_coeff$std_random[2:3] <- change_par$std_random
} else{
matern_coeff$std_random <- change_par$std_random
}
time_matern_par_end <- Sys.time()
time_matern_par <- time_matern_par_end - time_matern_par_start
} else{
matern_coeff <- NULL
time_matern_par <- NULL
}
} else{ # If model is NULL
coeff_random <- NULL
time_matern_par <- NULL
std_random <- NULL
time_build_likelihood <- NULL
start_values <- NULL
matern_coeff <- NULL
time_fit <- NULL
time_hessian <- NULL
time_par <- NULL
A_list <- NULL
if(ncol(X_cov) == 0){
stop("The model does not have either random nor fixed effects.")
}
names_tmp <- colnames(X_cov)
data_tmp <- cbind(y_resp, X_cov)
data_tmp <- na.omit(data_tmp)
res <- lm(data_tmp[,1] ~ data_tmp[,-1] - 1)
coeff_fixed <- res$coeff
names(coeff_fixed) <- names_tmp
sm_temp <- summary(res)
std_fixed <- sm_temp$coefficients
rownames(std_fixed) <- names_tmp
coeff_meas <- sm_temp$sigma
names(coeff_meas) <- "std. dev"
std_meas <- NULL
loglik <- logLik(res)[[1]]
}
if(is.null(coeff_fixed) && is.null(coeff_random)){
stop("The model does not have either random nor fixed effects.")
}
object <- list()
object$coeff <- list(measurement_error = coeff_meas,
fixed_effects = coeff_fixed, random_effects = coeff_random)
object$estimate_nu <- estimate_nu
if(object$estimate_nu && !null_model){
names(object$coeff$random_effects)[1] <- "alpha"
object$coeff$random_effects[1] <- object$coeff$random_effects[1] + model$d/2
}
object$std_errors <- list(std_meas = std_meas,
std_fixed = std_fixed, std_random = std_random)
object$call <- call_rspde_lme
object$terms <- list(fixed_effects = X_cov)
object$response_data <- list(y = y_resp)
object$formula <- formula
object$matern_coeff <- matern_coeff
object$estimation_method <- optim_method
object$parameterization_latent <- model$parameterization
object$repl <- repl
object$idx_repl <- idx_repl
object$optim_controls <- optim_controls
object$latent_model <- model
object$nobs <- sum(idx_repl)
object$null_model <- null_model
object$start_values <- start_values
object$loglik <- loglik
object$niter <- res$counts
object$response_var <- y_term
object$covariates <- cov_term
object$fitting_time <- time_fit
object$rspde_order <- rspde_order
object$time_matern_par <- time_matern_par
object$improve_hessian <- improve_hessian
object$time_hessian <- time_hessian
object$parallel <- parallel
object$time_par <- time_par
object$time_data <- time_data
object$optim_method <- optim_method
object$time_likelihood <- time_build_likelihood
object$A_list <- A_list
object$has_graph <- model$has_graph
object$which_repl <- which_repl
object$stationary <- model$stationary
object$nu <- nu
object$alpha <- alpha
object$estimated_alpha <- estimated_alpha
object$df.residual <- object$nobs -(1 + length(object$coeff$fixed_effects) + length(object$coeff$random_effects))
object$lik_fun <- likelihood_new
object$par_lik_fun <- new_likelihood
object$mle_par_orig <- res$par
# object$lik_fun <- likelihood
# object$start_val <- start_values
# if(model_matrix){
if(ncol(X_cov)>0){
object$model_matrix <- cbind(y_resp, X_cov)
} else{
object$model_matrix <- y_resp
}
# }
class(object) <- "rspde_lme"
return(object)
}
#' @name print.rspde_lme
#' @title Print Method for \code{rspde_lme} Objects
#' @description Provides a brief description of results related to mixed effects with Whittle-Matern latent models.
#' @param x object of class "rspde_lme" containing results from the fitted model.
#' @param ... further arguments passed to or from other methods.
#' @return Called for its side effects.
#' @noRd
#' @method print rspde_lme
#' @export
print.rspde_lme <- function(x, ...) {
#
if(!is.null(x$latent_model)){
if(x$latent_model$stationary){
call_name <- "Latent model - Whittle-Matern"
} else{
call_name <- "Latent model - Generalized Whittle-Matern"
}
} else{
call_name <- "Linear regression model"
}
coeff_fixed <- x$coeff$fixed_effects
coeff_random <- x$coeff$random_effects
cat("\n")
cat(call_name)
if(!x$estimate_nu && !is.null(x$latent_model)){
cat(" with fixed smoothness")
}
cat("\n\n")
cat("Call:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"),
"\n\n", sep = "")
cat(paste0("Fixed effects:", "\n"))
if(!is.null(coeff_fixed)){
print(coeff_fixed)
} else{
message("No fixed effects")
}
cat("\n")
if(!x$estimate_nu){
cat("Smoothness parameter:\n")
smooth <- c(x$alpha, x$nu)
names(smooth) <- c("alpha", "nu")
print(smooth)
cat("\n")
}
cat(paste0("Random effects:", "\n"))
if(!is.null(coeff_random)){
print(coeff_random)
if(x$stationary){
cat(paste0("\n", "Random effects (Matern parameterization):", "\n"))
print(x$matern_coeff$random_effects)
}
} else{
message("No random effects")
}
cat("\n")
cat(paste0("Measurement error:", "\n"))
print(x$coeff$measurement_error)
}
#' @name summary.rspde_lme
#' @title Summary Method for \code{rspde_lme} Objects.
#' @description Function providing a summary of results related to mixed effects regression models with Whittle-Matern latent models.
#' @param object an object of class "rspde_lme" containing results from the fitted model.
#' @param all_times Show all computed times.
#' @param ... not used.
#' @return An object of class \code{summary_rspde_lme} containing several
#' informations of a *rspde_lme* object.
#' @method summary rspde_lme
#' @export
summary.rspde_lme <- function(object, all_times = FALSE,...) {
ans <- list()
nfixed <- length(object$coeff$fixed_effects)
nrandom <- length(object$coeff$random_effects)
model_type <- !object$null_model
if(model_type){
if(object$latent_model$stationary){
call_name <- "Latent model - Whittle-Matern"
} else{
call_name <- "Latent model - Generalized Whittle-Matern"
}
} else{
call_name <- "Linear regression model"
}
coeff_fixed <- object$coeff$fixed_effects
coeff_random <- object$coeff$random_effects#
coeff_meas <- object$coeff$measurement_error
SEr_fixed <- object$std_errors$std_fixed
SEr_random <- object$std_errors$std_random
SEr_meas <- object$std_errors$std_meas
if(object$stationary){
coeff <- c(coeff_fixed, coeff_random, object$matern_coeff$random_effects, coeff_meas)
SEr <- c(SEr_fixed,SEr_random, object$matern_coeff$std_random, SEr_meas)
} else{
coeff <- c(coeff_fixed, coeff_random, coeff_meas)
SEr <- c(SEr_fixed,SEr_random, SEr_meas)
}
if(model_type){
tab <- cbind(coeff, SEr, coeff / SEr, 2 * stats::pnorm(-abs(coeff / SEr)))
colnames(tab) <- c("Estimate", "Std.error", "z-value", "Pr(>|z|)")
rownames(tab) <- names(coeff)
if(object$stationary){
tab <- list(fixed_effects = tab[seq.int(length.out = nfixed), , drop = FALSE], random_effects = tab[seq.int(length.out = nrandom) + nfixed, , drop = FALSE],
random_effects_matern = tab[seq.int(length.out = nrandom) + nrandom + nfixed, , drop = FALSE],
meas_error = tab[seq.int(length.out = 1) + nfixed+2*nrandom, , drop = FALSE])
} else{
tab <- list(fixed_effects = tab[seq.int(length.out = nfixed), , drop = FALSE], random_effects = tab[seq.int(length.out = nrandom) + nfixed, , drop = FALSE],
meas_error = tab[seq.int(length.out = 1) + nfixed+nrandom, , drop = FALSE])
}
} else{
tab <- list(fixed_effects = SEr_fixed, coeff_meas = coeff_meas)
}
ans$coefficients <- tab
ans$model_type <- model_type
ans$call_name <- call_name
ans$call <- object$call
ans$loglik <- object$loglik
ans$optim_method <- object$optim_method
ans$niter <- object$niter
ans$estimate_nu <- object$estimate_nu
ans$nu <- object[["nu"]]
ans$alpha <- object$alpha
ans$all_times <- all_times
ans$fitting_time <- object$fitting_time
ans$improve_hessian <- object$improve_hessian
ans$time_hessian <- object$time_hessian
ans$parallel <- object$parallel
ans$time_par <- object$time_par
ans$time_data <- object$time_data
ans$time_matern_par <- object$time_matern_par
ans$time_likelihood <- object$time_likelihood
class(ans) <- "summary_rspde_lme"
ans
}
#' @name print.summary_rspde_lme
#' @title Print Method for \code{summary_rspde_lme} Objects
#' @description Provides a brief description of results related to mixed effects regression models with Whittle-Matern latent models.
#' @param x object of class "summary_rspde_lme" containing results of summary method applied to a fitted model.
#' @param ... further arguments passed to or from other methods.
#' @return Called for its side effects.
#' @noRd
#' @method print summary_rspde_lme
#' @export
print.summary_rspde_lme <- function(x,...) {
tab <- x$coefficients
#
digits <- max(3, getOption("digits") - 3)
#
call_name <- x$call_name
cat("\n")
cat(call_name)
if(!x$estimate_nu){
cat(" with fixed smoothness")
}
cat("\n\n")
cat("Call:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"),
"\n", sep = "")
#
model_type <- tolower(x$model_type)
#
if(model_type){
if (NROW(tab$fixed_effects)) {
cat(paste0("\nFixed effects:\n"))
stats::printCoefmat(tab[["fixed_effects"]], digits = digits, signif.legend = FALSE)
} else {
message("\nNo fixed effects. \n")
}
#
if(!x$estimate_nu){
cat("\nSmoothness parameter: \n")
cat("\t alpha =",x$alpha,"\n")
cat("\t nu =", x$nu, "(Matern parameterization)\n")
}
if (NROW(tab$random_effects)) {
cat(paste0("\nRandom effects:\n"))
stats::printCoefmat(tab[["random_effects"]][,1:3], digits = digits, signif.legend = FALSE)
} else {
cat(paste0("\nRandom effects:\n"))
message("No random effects. \n")
}
if (NROW(tab$random_effects_matern)) {
cat(paste0("\nRandom effects (Matern parameterization):\n"))
stats::printCoefmat(tab[["random_effects_matern"]][,1:3], digits = digits, signif.legend = FALSE)
}
#
cat(paste0("\nMeasurement error:\n"))
stats::printCoefmat(tab[["meas_error"]][1,1:3,drop = FALSE], digits = digits, signif.legend = FALSE)
} else{
cat(paste0("\nFixed effects:\n"))
stats::printCoefmat(tab[["fixed_effects"]], digits = digits, signif.legend = FALSE)
cat(paste0("\nRandom effects:\n"))
message("No random effects. \n")
cat(paste0("\nMeasurement error:\n"))
print(tab$coeff_meas)
}
#
if (getOption("show.signif.stars")) {
cat("---\nSignif. codes: ", "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1", "\n\n")
}
#
cat("Log-Likelihood: ", x$loglik,"\n")
if(model_type){
cat(paste0("Number of function calls by 'optim' = ", x$niter[1],"\n"))
cat(paste0("Optimization method used in 'optim' = ", x$optim_method,"\n"))
cat(paste0("\nTime used to:"))
if(x$all_times){
cat("\t prepare the data = ", paste(trunc(x$time_data[[1]] * 10^5)/10^5,attr(x$time_data, "units"),"\n"))
cat("\t build the likelihood = ", paste(trunc(x$time_likelihood[[1]] * 10^5)/10^5,attr(x$time_likelihood, "units"),"\n"))
cat("\t compute Matern parameterization = ", paste(trunc(x$time_matern_par[[1]] * 10^5)/10^5,attr(x$time_likelihood, "units"),"\n"))
}
cat("\t fit the model = ", paste(trunc(x$fitting_time[[1]] * 10^5)/10^5,attr(x$fitting_time, "units"),"\n"))
if(x$improve_hessian){
cat(paste0("\t compute the Hessian = ", paste(trunc(x$time_hessian[[1]] * 10^5)/10^5,attr(x$time_hessian, "units"),"\n")))
}
if(x$parallel){
cat(paste0("\t set up the parallelization = ", paste(trunc(x$time_par[[1]] * 10^5)/10^5,attr(x$time_par, "units"),"\n")))
}
}
}
#' @name predict.rspde_lme
#' @title Prediction of a mixed effects regression model on a metric graph.
#' @param object The fitted object with the `rspde_lme()` function
#' @param newdata A `data.frame` or a `list` containing the covariates, the edge number and the distance on edge
#' for the locations to obtain the prediction.
#' @param loc Prediction locations. Can either be a `data.frame`, a `matrix` or a character vector, that contains the names of the columns of the coordinates of the locations. For models using `metric_graph` objects, plase use `edge_number` and `distance_on_edge` instead.
#' @param mesh Obtain predictions for mesh nodes? The graph must have a mesh, and either `only_latent` is set to TRUE or the model does not have covariates.
# @param repl Which replicates to obtain the prediction. If `NULL` predictions will be obtained for all replicates. Default is `NULL`.
#' @param which_repl Which replicates to use? If `NULL` all replicates will be used.
#' @param compute_variances Set to also TRUE to compute the kriging variances.
#' @param posterior_samples If `TRUE`, posterior samples will be returned.
#' @param n_samples Number of samples to be returned. Will only be used if `sampling` is `TRUE`.
#' @param edge_number Name of the variable that contains the edge number, the default is `edge_number`.
#' @param distance_on_edge Name of the variable that contains the distance on edge, the default is `distance_on_edge`.
#' @param normalized Are the distances on edges normalized?
#' @param sample_latent Do posterior samples only for the random effects?
#' @param return_as_list Should the means of the predictions and the posterior samples be returned as a list, with each replicate being an element?
#' @param return_original_order Should the results be return in the original (input) order or in the order inside the graph?
#' @param ... Not used.
#' @param data `r lifecycle::badge("deprecated")` Use `newdata` instead.
#' @return A list with elements `mean`, which contains the means of the
#' predictions, `fe_mean`, which is the prediction for the fixed effects, `re_mean`, which is the prediction for the random effects, `variance` (if `compute_variance` is `TRUE`), which contains the
#' variances of the predictions, `samples` (if `posterior_samples` is `TRUE`),
#' which contains the posterior samples.
#' @export
#' @method predict rspde_lme
predict.rspde_lme <- function(object, newdata = NULL, loc = NULL, mesh = FALSE, which_repl = NULL, compute_variances = FALSE, posterior_samples = FALSE,
n_samples = 100, sample_latent = FALSE, edge_number = "edge_number",
distance_on_edge = "distance_on_edge", normalized = FALSE, return_as_list = FALSE, return_original_order = TRUE,
...,
data = deprecated()) {
if (lifecycle::is_present(data)) {
if (is.null(newdata)) {
lifecycle::deprecate_warn("2.3.3", "predict(data)", "predict(newdata)",
details = c("`data` was provided but not `newdata`. Setting `newdata <- data`.")
)
newdata <- data
} else {
lifecycle::deprecate_warn("2.3.3", "predict(data)", "predict(newdata)",
details = c("Both `newdata` and `data` were provided. Only `newdata` will be considered.")
)
}
data <- NULL
}
data <- newdata
if(is.null(data)){
if(!mesh){
stop("If 'mesh' is false, you should supply data!")
}
}
out <- list()
coeff_fixed <- object$coeff$fixed_effects
coeff_random <- object$coeff$random_effects
coeff_meas <- object$coeff$measurement_error
if(inherits(object, "graph_lme")){
if(object$estimate_nu){
coeff_random[1] <- coeff_random[1] - 0.5
}
}
if(object$has_graph){
loc <- cbind(data[[edge_number]], data[[distance_on_edge]])
} else if(is.character(loc)){
loc <- data[loc]
}
loc <- as.matrix(loc)
X_cov_initial <- as.matrix(object$model_matrix)[,-1]
if(ncol(X_cov_initial) > 0){
if(mesh){
stop("In the presence of covariates, you should provide the data, including the covariates at the prediction locations. If you only want predictions for the latent model, set 'only_latent' to TRUE.")
}
}
X_cov_pred <- stats::model.matrix(object$covariates, data)
if(nrow(X_cov_pred) != nrow(as.matrix(loc))){
stop("Covariates not found in data.")
}
if(sum(duplicated(loc)) > 0){
warning("There are duplicated locations for prediction, we will try to process the data to extract the unique locations,
along with the corresponding covariates.")
if(nrow(X_cov_pred) == nrow(loc)){
data_tmp <- cbind(loc, X_cov_pred)
}
data_tmp <- unique(data_tmp)
if(sum(duplicated(cbind(data_tmp[,1:ncol(loc)]))) > 0){
stop("Data processing failed, please provide a data with unique locations.")
}
}
if(!mesh){
n_prd <- length(loc[,1])
# Convert data to normalized
if(object$has_graph && !normalized){
loc[,2] <- loc[,2] / object$graph$edge_lengths[loc[,1]]
}
Aprd <- object$latent_model$make_A(loc)
} else{
Aprd <- Matrix::Diagonal(nrow(object$latent_model$C))
}
##
repl_vec <- object$repl
if(is.null(which_repl)){
u_repl <- unique(repl_vec)
} else{
u_repl <- unique(which_repl)
}
##
if(all(dim(X_cov_pred) == c(0,1))){
X_cov_pred <- matrix(1, nrow = nrow(loc), ncol=1)
}
if(ncol(X_cov_pred) > 0){
mu_prd <- X_cov_pred %*% coeff_fixed
} else{
mu_prd <- matrix(0, nrow = nrow(loc), ncol=1)
}
model_matrix_fit <- object$model_matrix
model_matrix_fit <- as.matrix(model_matrix_fit)
if(ncol(model_matrix_fit)>1){
X_cov_fit <- model_matrix_fit[,2:ncol(model_matrix_fit)]
mu <- X_cov_fit %*% coeff_fixed
} else{
mu <- 0
}
Y <- model_matrix_fit[,1] - mu
model_type <- object$latent_model
sigma.e <- coeff_meas[[1]]
sigma_e <- sigma.e
## construct Q
if(object$estimate_nu){
alpha_est <- coeff_random[1]
tau_est <- coeff_random[2]
kappa_est <- coeff_random[3]
} else{
tau_est <- coeff_random[1]
kappa_est <- coeff_random[2]
alpha_est <- NULL
}
new_rspde_obj <- update(object$latent_model,
user_alpha = alpha_est,
user_kappa = kappa_est,
user_tau = tau_est,
parameterization = "spde")
Q <- new_rspde_obj$Q
idx_obs_full <- as.vector(!is.na(Y))
Aprd <- kronecker(matrix(1, 1, object$rspde_order + 1), Aprd)
for(repl_y in u_repl){
idx_repl <- repl_vec == repl_y
idx_obs <- idx_obs_full[idx_repl]
y_repl <- Y[idx_repl]
y_repl <- y_repl[idx_obs]
A_repl <- object$A_list[[repl_y]]
Q_xgiveny <- t(A_repl) %*% A_repl/sigma_e^2 + Q
mu_krig <- solve(Q_xgiveny,as.vector(t(A_repl) %*% y_repl / sigma_e^2))
# mu_krig <- mu_krig[(gap+1):length(mu_krig)]
mu_krig <- Aprd %*% mu_krig
mu_re <- mu_krig
mu_fe <- mu_prd
mu_krig <- mu_prd + mu_krig
mean_tmp <- as.vector(mu_krig)
mean_fe_tmp <- as.vector(mu_fe)
mean_re_tmp <- as.vector(mu_re)
if(!return_as_list){
out$mean <- c(out$mean, mean_tmp)
out$repl <- c(out$repl, rep(repl_y,n_prd))
out$fe_mean <- c(out$fe_mean, mean_fe_tmp)
out$re_mean <- c(out$re_mean, mean_re_tmp)
} else{
out$mean[[repl_y]] <- mean_tmp
out$fe_mean[[repl_y]] <- mean_fe_tmp
out$re_mean[[repl_y]] <- mean_re_tmp
}
if (compute_variances) {
post_cov <- Aprd%*%solve(Q_xgiveny, t(Aprd))
var_tmp <- pmax(diag(post_cov),0)
if(!return_as_list){
out$variance <- rep(var_tmp, length(u_repl))
} else {
for(repl_y in u_repl){
out$variance[[repl_y]] <- var_tmp
}
}
}
if(posterior_samples){
mean_tmp <- as.vector(mu_krig)
post_cov <- Aprd%*%solve(Q_xgiveny, t(Aprd))
Z <- rnorm(dim(post_cov)[1] * n_samples)
dim(Z) <- c(dim(post_cov)[1], n_samples)
LQ <- tryCatch(chol(post_cov), error = function(e){chol(post_cov + 1e-8*Matrix::Diagonal(nrow(post_cov)))})
X <- LQ %*% Z
X <- X + mean_tmp
if(!sample_latent){
X <- X + matrix(rnorm(n_samples * length(mean_tmp), sd = sigma.e), nrow = length(mean_tmp))
} else{
X <- X - as.vector(mu_prd)
}
if(!return_as_list){
out$samples <- rbind(out$samples, X)
} else{
out$samples[[repl_y]] <- X
}
}
}
return(out)
}
#' @name logLik.rspde_lme
#' @title Log-likelihood for \code{rspde_lme} objects
#' @description computes the log-likelihood for a fitted mixed effects model with `rspde_lme`.
#' @param x Object of class `rspde_lme` containing results from the fitted model.
#' @param ... Currently not used.
#' @return Log-likelihood value.
#' @noRd
#' @method logLik rspde_lme
#' @export
logLik.rspde_lme <- function(object, ...){
ll <- object$loglik
attr(ll, "df") <- 1 + length(object$coeff$fixed_effects) + length(object$coeff$random_effects)
return(ll)
}
#' @name nobs.rspde_lme
#' @title Number of observations for \code{rspde_lme} objects
#' @description Gets the number of observations of the fitted object.
#' @param x Object of class `rspde_lme` containing results from the fitted model.
#' @param ... Currently not used.
#' @return The number of observations.
#' @noRd
#' @method nobs rspde_lme
#' @export
nobs.rspde_lme <- function(object, ...){
return(object$nobs)
}
#' @name deviance.rspde_lme
#' @title Deviance for \code{rspde_lme} objects
#' @description Gets the deviance for `rspde_lme` fitted object.
#' @param x Object of class `rspde_lme` containing results from the fitted model.
#' @param ... Currently not used.
#' @return Deviance
#' @noRd
#' @method deviance rspde_lme
#' @export
deviance.rspde_lme <- function(object, ...){
if(length(object$coeff$random_effects) > 0){
return(-2*object$loglik)
} else{
df_temp <- as.data.frame(object$model_matrix)
colnames(df_temp)[1] <- as.character(object$response_var)
fit_lm <- stats::lm(object$formula, data = df_temp)
return(stats::deviance(fit_lm))
}
}
#' @name glance.rspde_lme
#' @title Glance at an \code{rspde_lme} object
#' @aliases glance glance.rspde_lme
#' @description Glance accepts a \code{rspde_lme} object and returns a
#' [tidyr::tibble()] with exactly one row of model summaries.
#' The summaries are the square root of the estimated variance of the measurement error, residual
#' degrees of freedom, AIC, BIC, log-likelihood,
#' the type of latent model used in the fit and the total number of observations.
#' @param x An \code{rspde_lme} object.
#' @param ... Currently not used.
#' @return A [tidyr::tibble()] with exactly one row and columns:
#' \itemize{
#' \item `nobs` Number of observations used.
#' \item `sigma` the square root of the estimated residual variance
#' \item `logLik` The log-likelihood of the model.
#' \item `AIC` Akaike's Information Criterion for the model.
#' \item `BIC` Bayesian Information Criterion for the model.
#' \item `deviance` Deviance of the model.
#' \item `df.residual` Residual degrees of freedom.
#' \item `model.type` Type of latent model fitted.
#' }
#' @seealso [augment.rspde_lme]
#' @method glance rspde_lme
#' @export
glance.rspde_lme <- function(x, ...){
alpha <- NULL
if(!is.null(x$alpha)){
alpha <- x$alpha
} else {
alpha <- x$estimated_alpha
}
tidyr::tibble(nobs = stats::nobs(x),
sigma = as.numeric(x$coeff$measurement_error[[1]]),
logLik = as.numeric(stats::logLik(x)), AIC = stats::AIC(x),
BIC = stats::BIC(x), deviance = stats::deviance(x),
df.residual = stats::df.residual(x),
model = x$latent_model$type,
alpha = alpha,
cov_function = x$latent_model$cov_function_name)
}
#' @name augment.rspde_lme
#' @title Augment data with information from a `rspde_lme` object
#' @aliases augment augment.rspde_lme
#' @description Augment accepts a model object and a dataset and adds information about each observation in the dataset. It includes
#' predicted values in the `.fitted` column, residuals in the `.resid` column, and standard errors for the fitted values in a `.se.fit` column.
#' It also contains the New columns always begin with a . prefix to avoid overwriting columns in the original dataset.
#' @param x A `rspde_lme` object.
#' @param newdata A `data.frame` or a `list` containing the covariates, the edge
#' number and the distance on edge for the locations to obtain the prediction. If `NULL`, the fitted values will be given for the original locations where the model was fitted.
#' @param loc Prediction locations. Can either be a `data.frame`, a `matrix` or a character vector, that contains the names of the columns of the coordinates of the locations. For models using `metric_graph` objects, plase use `edge_number` and `distance_on_edge` instead.
#' @param mesh Obtain predictions for mesh nodes? The graph must have a mesh, and either `only_latent` is set to TRUE or the model does not have covariates.
# @param repl Which replicates to obtain the prediction. If `NULL` predictions will be obtained for all replicates. Default is `NULL`.
#' @param which_repl Which replicates to obtain the prediction. If `NULL` predictions
#' will be obtained for all replicates. Default is `NULL`.
#' @param se_fit Logical indicating whether or not a .se.fit column should be added to the augmented output. If TRUE, it only returns a non-NA value if type of prediction is 'link'.
#' @param conf_int Logical indicating whether or not confidence intervals for the fitted variable should be built.
#' @param pred_int Logical indicating whether or not prediction intervals for future observations should be built.
#' @param level Level of confidence and prediction intervals if they are constructed.
#' @param n_samples Number of samples when computing prediction intervals.
#' @param edge_number Name of the variable that contains the edge number, the default is `edge_number`.
#' @param distance_on_edge Name of the variable that contains the distance on edge, the default is `distance_on_edge`.
#' @param normalized Are the distances on edges normalized?
#' @param ... Additional arguments.
#'
#' @return A [tidyr::tibble()] with columns:
#' \itemize{
#' \item `.fitted` Fitted or predicted value.
#' \item `.fittedlwrconf` Lower bound of the confidence interval, if conf_int = TRUE
#' \item `.fitteduprconf` Upper bound of the confidence interval, if conf_int = TRUE
#' \item `.fittedlwrpred` Lower bound of the prediction interval, if pred_int = TRUE
#' \item `.fitteduprpred` Upper bound of the prediction interval, if pred_int = TRUE
#' \item `.fixed` Prediction of the fixed effects.
#' \item `.random` Prediction of the random effects.
#' \item `.resid` The ordinary residuals, that is, the difference between observed and fitted values.
#' \item `.se_fit` Standard errors of fitted values, if se_fit = TRUE.
#' }
#'
#' @seealso [glance.rspde_lme]
#' @method augment rspde_lme
#' @export
augment.rspde_lme <- function(x, newdata = NULL, loc = NULL, mesh = FALSE, which_repl = NULL, se_fit = FALSE, conf_int = FALSE, pred_int = FALSE, level = 0.95, n_samples = 100, edge_number = "edge_number",
distance_on_edge = "distance_on_edge", normalized = FALSE,...) {
.resid <- FALSE
if(is.null(newdata)){
.resid <- TRUE
}
level <- level[[1]]
if(!is.numeric(level)){
stop("'level' must be numeric!")
}
if(level > 1 || level < 0){
stop("'level' must be between 0 and 1!")
}
if(pred_int){
pred <- stats::predict(x, newdata = newdata, loc = loc, mesh = mesh, which_repl = which_repl, compute_variances = TRUE,
posterior_samples = TRUE, n_samples = n_samples, edge_number = edge_number,
distance_on_edge = distance_on_edge, normalized = normalized, return_original_order = FALSE, return_as_list = FALSE)
} else if(conf_int || se_fit){
pred <- stats::predict(x, newdata = newdata, loc = loc, mesh = mesh, which_repl = which_repl, compute_variances = TRUE,
posterior_samples = FALSE, edge_number = ".edge_number",
distance_on_edge = ".distance_on_edge", normalized = TRUE, return_original_order = FALSE, return_as_list = FALSE)
} else{
pred <- stats::predict(x, newdata = newdata, loc = loc, mesh = mesh, which_repl = which_repl, compute_variances = FALSE, posterior_samples = FALSE, edge_number = ".edge_number",
distance_on_edge = ".distance_on_edge", normalized = TRUE, return_original_order = FALSE, return_as_list = FALSE)
}
if(se_fit || pred_int || conf_int){
newdata[[".fitted"]] <- pred$mean
newdata[[".se_fit"]] <- sqrt(pred$variance)
newdata[[".fixed"]] <- pred$fe_mean
newdata[[".random"]] <- pred$re_mean
if(.resid){
newdata[[".resid"]] <- pred$mean - newdata[[as.character(x$response_var)]]
}
} else{
newdata[[".fitted"]] <- pred$mean
newdata[[".fixed"]] <- pred$fe_mean
newdata[[".random"]] <- pred$re_mean
if(.resid){
newdata[[".resid"]] <- pred$mean - newdata[[as.character(x$response_var)]]
}
}
if(conf_int){
newdata$.fittedlwrconf <- newdata[[".fitted"]] + stats::qnorm( (1-level)/2 )*newdata[[".se_fit"]]
newdata$.fitteduprconf <- newdata[[".fitted"]] + stats::qnorm( (1+level)/2 )*newdata[[".se_fit"]]
}
if(pred_int){
list_pred_int <- lapply(1:nrow(pred$samples), function(i){
y_sim <- pred$samples[i,]
y_sim <- sort(y_sim)
idx_lwr <- max(1, round(n_samples * (1 - level) / 2))
idx_upr <- round(n_samples * (1 + level) / 2)
c(y_sim[idx_lwr], y_sim[idx_upr])})
list_pred_int <- unlist(list_pred_int)
list_pred_int <- t(matrix(list_pred_int, nrow = 2))
newdata$.fittedlwrpred <- list_pred_int[,1]
newdata$.fitteduprpred <- list_pred_int[,2]
}
newdata
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.