Nothing
#' @title Fit PERMANOVA models and arrange by AICc
#'
#' @description This function fits PERMANOVA models for all combinations of variables in a given dataset, and arranges the models by Akaike Information Criterion (AICc) score. The function also calculates the maximum variance inflation factor (max_vif) for each model.
#'
#' @param all_forms A data frame generated by \code{\link{make_models}}
#' @param veg_data A dataset with vegetation presence absense or abundance data
#' @param env_data A dataset with the variables described in all_froms
#' @param ncores An integer specifying the number of cores to use for parallel processing
#' @param log logical if true, a log file will be generated
#' @param verbose logical, defaults TRUE, sends messages about processing times
#' @param logfile the text file that will be generated as a log
#' @param multiple after how many loops to write a log file
#' @param method method for distance from \code{\link{vegdist}}
#' @param strata a block variable similar to the use in \code{\link{adonis2}}
#'
#' @return A data.frame with fitted models arranged by AICc, including the formula used, the number of
#' explanatory variables, R2, adjusted R2, and the AICc and max VIF.
#'
#'
#' @importFrom parallel makeCluster stopCluster
#' @importFrom doParallel registerDoParallel
#' @importFrom foreach foreach %dopar%
#' @importFrom vegan vegdist adonis2
#' @importFrom dplyr bind_rows bind_cols select filter arrange
#' @importFrom broom tidy
#' @importFrom tidyr pivot_longer
#' @importFrom stringr str_replace_all
#' @importFrom stats rnorm lm as.formula complete.cases
#' @examples
#'
#' \donttest{
#' library(vegan)
#' data(dune)
#' data(dune.env)
#'
#' AllModels <- make_models(vars = c("A1", "Moisture", "Manure"))
#'
#' fit_models(all_forms = AllModels,
#' veg_data = dune,
#' env_data = dune.env)
#' }
#'
#' @references
#' Anderson, M. J. (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecology, 26(1), 32-46.
#' https://doi.org/10.1111/j.1442-9993.2001.01070.pp.x
#'
#' @export
fit_models <- function(all_forms,
veg_data,
env_data,
method = "bray",
ncores = 2,
log = TRUE,
logfile = "log.txt",
multiple = 100,
strata = NULL,
verbose = FALSE){
AICc <- R2 <- term <- x <- NULL
if(log){
if(file.exists(logfile)){
file.remove(logfile)
}
}
meta_data <- all_forms
if(!("max_vif" %in% colnames(meta_data))){
meta_data$max_vif <- NA
}
vegetation_data = veg_data
# Check for missing values
missing_rows <- !complete.cases(env_data)
if (any(missing_rows)) {
if(verbose){
# Print message about missing rows and columns
message(sprintf("Removing %d rows with missing values\n", sum(missing_rows)))
message("Columns with missing values: ")
message(names(env_data)[colSums(is.na(env_data)) > 0], sep = ", ")
}
}
# Filter out missing rows
new_env_data <- env_data[complete.cases(env_data), ]
vegetation_data <- vegetation_data[complete.cases(env_data), ]
cl <- parallel::makeCluster(ncores)
doParallel::registerDoParallel(cl)
Distance <- vegan::vegdist(vegetation_data, method = method)
Fs <- foreach(x = 1:nrow(meta_data), .packages = c("vegan", "dplyr", "AICcPermanova", "tidyr", "broom"), .combine = bind_rows, .export = c("Distance")) %dopar% {
Response = new_env_data
Response$y <- rnorm(n = nrow(Response))
gc()
Temp <- meta_data[x,]
if(is.null(strata)){
Model <- try(vegan::adonis2(as.formula(Temp$form[1]), data = Response, by = "margin"))
}
if(!is.null(strata)){
# Convert strata variable to factor
strata_factor <- factor(Response[[strata]])
Model <- try(with(Response, vegan::adonis2(as.formula(Temp$form[1]), data = Response, by = "margin", strata = strata_factor)), silent = TRUE)
}
Temp <- tryCatch(
expr = cbind(Temp, AICcPermanova::AICc_permanova2(Model)),
error = function(e) NA
)
if(is.na(Temp$max_vif)){
Temp$max_vif <- tryCatch(
expr = VIF(lm(as.formula(stringr::str_replace_all(Temp$form[1], "Distance ", "y")), data = Response)),
error = function(e) NA
)
}
Rs <- tryCatch(
{
tidy_model <- broom::tidy(Model)
if (inherits(tidy_model, "try-error")) {
stop("Error occurred in broom::tidy(Model)")
}
tidy_model |>
dplyr::filter(!(term %in% c("Residual", "Total"))) |>
dplyr::select(term, R2) |>
tidyr::pivot_wider(names_from = term, values_from = R2)
},
error = function(e) {
message("Error: ", conditionMessage(e))
NULL
}
)
if(log){
if((x %% multiple) == 0){
sink(logfile, append = TRUE)
cat(paste("finished", x, "number of models", Sys.time(), "of", nrow(meta_data)))
cat("\n")
sink()
}
}
Temp <- bind_cols(Temp, Rs)
Temp
}
parallel::stopCluster(cl)
Fs <- Fs |>
dplyr::arrange(AICc)
return(Fs)
}
#' Get Maximum Variance Inflation Factor (VIF) from a Model
#'
#' This function calculates the maximum Variance Inflation Factor (VIF) for a given model.
#' The VIF is a measure of collinearity among predictor variables within a regression model.
#' It quantifies how much the variance of an estimated regression coefficient is increased due to collinearity.
#' A VIF of 1 indicates no collinearity, while values above 1 indicate increasing levels of collinearity.
#' A VIF of 5 or greater is often considered high, indicating a strong presence of collinearity.
#'
#' @param model A regression model, such as those created by lm, glm, or other similar functions.
#'
#' @return The maximum VIF value.
#'
#' @references
#' - Belsley, D. A., Kuh, E., & Welsch, R. E. (1980). Regression Diagnostics: Identifying Influential Data and Sources of Collinearity. John Wiley & Sons.
#' - Kutner, M. H., Nachtsheim, C. J., Neter, J., & Li, W. (2004). Applied Linear Statistical Models. McGraw-Hill/Irwin.
#' - O'Brien, R. M. (2007). A caution regarding rules of thumb for variance inflation factors. Quality & Quantity, 41(5), 673-690.
#'
#' @importFrom car vif
#'
#' @keywords collinearity
#'
#' @export
VIF <- function(model) {
tryCatch({
vif <- car::vif(model)
max(vif)
}, error = function(e) {
if (grepl("aliased coefficients", e$message)) {
20000
} else if (grepl("model contains fewer than 2 terms", e$message)) {
0
} else {
stop(e)
}
})
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.