Nothing
#' @title Filters out equations with high multicollinearity
#'
#' @description This function takes a dataframe with several models and
#' calculates the maximum Variance Inflation Factor (VIF) for a given
#' model. And either filters out the ones with high collinearity or it
#' flags them accordingly
#'
#' @param all_forms A data frame generated by \code{\link{make_models}}
#' @param env_data A dataset with the variables described in all_froms
#' @param ncores An integer specifying the number of cores to use for parallel processing
#' @param filter logical, if TRUE it filters out the models with a maximum VIF of high or higher, if FALSE it generates a new column called collinearity, wich will
#' @param verbose logical, defaults TRUE, sends messages about processing times
#' @return A data.frame with the models, fitering out the ones with high collinearity or flagginf them.
#' @importFrom parallel makeCluster stopCluster
#' @importFrom doParallel registerDoParallel
#' @importFrom dplyr bind_rows filter mutate
#' @importFrom stringr str_replace_all
#' @importFrom stats rnorm lm as.formula complete.cases
#'
#' @examples
#'\donttest{
#' library(vegan)
#' data(dune)
#' data(dune.env)
#' AllModels <- make_models(vars = c("A1", "Moisture", "Manure"))
#'
#' filter_vif(all_forms = AllModels,
#' env_data = dune.env)
#'}
#' @export
filter_vif <- function(all_forms,
env_data,
ncores = 2,
filter = TRUE,
verbose = TRUE){
max_vif <- x <- NULL
meta_data <- all_forms
meta_data$max_vif <- NA
if(!filter){
meta_data$collinearity <- NA
}
# Check for missing values
missing_rows <- !complete.cases(env_data)
if (any(missing_rows)) {
if(verbose){
# Print message about missing rows and columns
message(sprintf("Removing %d rows with missing values\n", sum(missing_rows)))
message("Columns with missing values: ")
message(names(env_data)[colSums(is.na(env_data)) > 0], sep = ", ")
}
}
# Filter out missing rows
new_env_data <- env_data[complete.cases(env_data), ]
cl <- parallel::makeCluster(ncores)
doParallel::registerDoParallel(cl)
Fs <- foreach(x = 1:nrow(meta_data), .packages = c("dplyr", "AICcPermanova", "stringr"), .combine = bind_rows) %dopar% {
Response = new_env_data
Response$y <- rnorm(n = nrow(Response))
gc()
Temp <- meta_data[x,]
Temp$max_vif <- tryCatch(
expr = VIF(lm(as.formula(stringr::str_replace_all(Temp$form[1], "Distance ", "y")), data = Response)),
error = function(e) NA
)
Temp
}
parallel::stopCluster(cl)
if(filter){
Fs <- Fs |>
dplyr::filter(max_vif < 5)
}
if(!filter){
Fs <- Fs |>
dplyr::mutate(collinearity = ifelse(max_vif < 5, "low", "high"))
}
return(Fs)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.