#' Model Summary for Regression Models
#'
#' `r lifecycle::badge("stable")` \cr
#' The function will extract the relevant coefficients from the regression models (see below for supported model).
#'
#' @param model an model object. The following model are tested for accuracy: `lm`, `glm`, `lme`, `lmer`, `glmer`. Other model object may work if it work with parameters::model_parameters()
#' @param digits number of digits to round to
#' @param streamline print streamlined output. Only print model estimate and performance.
#' @param return_result It set to `TRUE`, it return the model estimates data frame.
#' @param assumption_plot Generate an panel of plots that check major assumptions. It is usually recommended to inspect model assumption violation visually. In the background, it calls `performance::check_model()`.
#' @param quite suppress printing output
#' @param standardize The method used for standardizing the parameters. Can be NULL (default; no standardization), "refit" (for re-fitting the model on standardized data) or one of "basic", "posthoc", "smart", "pseudo". See 'Details' in parameters::standardize_parameters()
#' @param ci_method see options in the `Mixed model` section in ?parameters::model_parameters()
#'
#' @references
#' Nakagawa, S., & Schielzeth, H. (2013). A general and simple method for obtaining R2 from generalized linear mixed-effects models. Methods in Ecology and Evolution, 4(2), 133–142. https://doi.org/10.1111/j.2041-210x.2012.00261.x
#'
#' @return a list of model estimate data frame, model performance data frame, and the assumption plot (an `ggplot` object)
#'
#' @export
#' @examples
#' # I am going to show the more generic usage of this function
#' # You can also use this package's built in function to fit the models
#' # I recommend using the integrated_multilevel_model_summary to get everything
#'
#' # lme example
#' lme_fit <- lme4::lmer("popular ~ texp + (1 | class)",
#' data = popular
#' )
#'
#' model_summary(lme_fit)
#'
#' # lm example
#'
#' lm_fit <- lm(Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width,
#' data = iris
#' )
#'
#' model_summary(lm_fit)
model_summary <- function(model,
digits = 3,
assumption_plot = FALSE,
quite = FALSE,
streamline = TRUE,
return_result = FALSE,
standardize = NULL,
ci_method = 'satterthwaite') {
################################################ Linear Mixed Effect Model ################################################
## lme package
if (inherits(model, 'lme')) {
stop('Stop supporting nlme model temporaily. Please use lme4 instead')
model_type <- "Linear Mixed Effect Model (fitted using nlme)"
predict_var <- as.character(attributes(model$terms)$variables)
DV <- predict_var[2]
IV <- predict_var[c(3:length(predict_var))]
IV <- paste0(IV, collapse = ", ")
family <- NULL
# Assumptions check
convergence_check <- FALSE
normality_check <- FALSE
outlier_check <- FALSE
autocorrelation_check <- FALSE
heteroscedasticity_check <- TRUE
collinearity_check <- TRUE
singular_check <- TRUE
if (all(unlist(lapply(
c("lavaSearch2"), requireNamespace
)))){
lme_param <- tryCatch(parameters::model_parameters(model,ci_method = ci_method))
} else{
"Please install.packages('lavaSearch2') to use nlme"
}
model_summary_df <- lme_param %>%
as.data.frame() %>%
dplyr::rename(df = 'df_error') %>%
dplyr::rename(ci.lower = 'CI_low') %>%
dplyr::rename(ci.upper = 'CI_high') %>%
dplyr::select(-"CI") %>%
dplyr::select(-'p', dplyr::everything(), "p")
## lmer package
} else if (inherits(model, 'lmerMod')) {
model_type <-
"Linear Mixed Effect Model (fitted using lme4 or lmerTest)"
formula_attribute <-
stats::terms(insight::find_formula(model)$conditional)
DV <- as.character(attributes(formula_attribute)$variables)[2]
IV <- attributes(formula_attribute)$term.labels
IV <- IV[!stringr::str_detect(IV, "\\+")]
IV <- paste0(IV, collapse = ", ")
family <- NULL
# Assumptions check
convergence_check <- TRUE
normality_check <- TRUE
outlier_check <- TRUE
autocorrelation_check <- TRUE
heteroscedasticity_check <- TRUE
collinearity_check <- TRUE
singular_check <- TRUE
lme_param <-
parameters::model_parameters(model, standardize = standardize,ci_method = ci_method)
model_summary_df <- lme_param %>%
as.data.frame() %>%
dplyr::rename(df = 'df_error') %>%
dplyr::rename(ci.lower = 'CI_low') %>%
dplyr::rename(ci.upper = 'CI_high') %>%
dplyr::select(-"CI") %>%
dplyr::select(-'p', dplyr::everything(), "p")
################################################ Generalized Linear Mixed Effect Model ################################################
} else if (inherits(model, 'glmerMod')) {
model_type <- "Generalized Linear Mixed Effect Model"
formula_attribute <- stats::terms(model@call$formula)
DV <- as.character(attributes(formula_attribute)$variables)[2]
IV <- attributes(formula_attribute)$term.labels
IV <- IV[!stringr::str_detect(IV, "\\+")]
IV <- paste0(IV, collapse = ", ")
family <- model@call$family
# Assumptions check
convergence_check <- TRUE
normality_check <- FALSE
outlier_check <- FALSE
autocorrelation_check <- TRUE
heteroscedasticity_check <- TRUE
collinearity_check <- TRUE
singular_check <- TRUE
glme_param <- parameters::parameters(model)
model_summary_df <- glme_param %>%
as.data.frame() %>%
dplyr::rename(df = 'df_error') %>%
dplyr::rename(ci.lower = 'CI_low') %>%
dplyr::rename(ci.upper = 'CI_high') %>%
dplyr::select(-"CI") %>%
dplyr::select(-'p', dplyr::everything(), "p")
################################################ Generalized Linear Regression ################################################
} else if (inherits(model, "glm")) {
model_type <- "Generazlied Linear regression"
predict_var <- as.character(attributes(model$terms)$predvars)
DV <- predict_var[2]
IV <- predict_var[c(3:length(predict_var))]
IV <- paste0(IV, collapse = ", ")
family <- as.character(model$family)[1]
convergence_check <- FALSE
normality_check <- FALSE
outlier_check <- FALSE
autocorrelation_check <- TRUE
heteroscedasticity_check <- TRUE
collinearity_check <- TRUE
singular_check <- FALSE
glm_param <- parameters::parameters(model)
model_summary_df <- glm_param %>%
as.data.frame() %>%
dplyr::rename(df = 'df_error') %>%
dplyr::rename(ci.lower = 'CI_low') %>%
dplyr::rename(ci.upper = 'CI_high') %>%
dplyr::select(-"CI") %>%
dplyr::select(-'p', dplyr::everything(), "p")
################################################ ANOVA ################################################
} else if (inherits(model, 'aov')) {
# Parameters for output table use
model_type <- "ANOVA"
predict_var <- as.character(attributes(model$terms)$predvars)
DV <- predict_var[2]
IV <- predict_var[c(3:length(predict_var))]
IV <- paste0(IV, collapse = ", ")
family <- NULL
# Assumptions check
convergence_check <- FALSE
normality_check <- TRUE
outlier_check <- TRUE
autocorrelation_check <- TRUE
heteroscedasticity_check <- TRUE
collinearity_check <- TRUE
singular_check <- FALSE
aov_param <- parameters::parameters(model)
model_summary_df <- aov_param %>%
as.data.frame() %>%
dplyr::select(-'p', dplyr::everything(), "p")
################################################ Linear Regression ################################################
} else if (inherits(model, 'lm')) {
# Parameters for output table use
model_type <- "Linear regression"
predict_var <- as.character(attributes(model$terms)$predvars)
DV <- predict_var[2]
IV <- predict_var[c(3:length(predict_var))]
IV <- paste0(IV, collapse = ", ")
family <- NULL
# Assumptions check
convergence_check <- FALSE
normality_check <- TRUE
outlier_check <- TRUE
autocorrelation_check <- TRUE
heteroscedasticity_check <- TRUE
collinearity_check <- TRUE
singular_check <- FALSE
lm_param <- parameters::parameters(model)
model_summary_df <- lm_param %>%
as.data.frame() %>%
dplyr::rename(df = 'df_error') %>%
dplyr::rename(ci.lower = 'CI_low') %>%
dplyr::rename(ci.upper = 'CI_high') %>%
dplyr::select(-"CI") %>%
dplyr::select(-'p', dplyr::everything(), "p")
################################################ Non-tested situations ################################################
} else {
model_type <- "Unable to Determined for Unknown Model"
DV <- "Unable to Determined for Unknown Model"
IV <- "Unable to Determined for Unknown Model"
convergence_check <- TRUE
normality_check <- TRUE
outlier_check <- TRUE
autocorrelation_check <- TRUE
heteroscedasticity_check <- TRUE
collinearity_check <- TRUE
singular_check <- TRUE
other_param <- parameters::parameters(model)
model_summary_df <- other_param %>%
as.data.frame() %>%
dplyr::rename(df = 'df_error') %>%
dplyr::rename(ci.lower = 'CI_low') %>%
dplyr::rename(ci.upper = 'CI_high') %>%
dplyr::select(-"CI")
warning(
"This model is not formally supported. Please proceed with cautious. The model is passed to parameters::parameters() to extract relevant parameters"
)
}
performance_warning <-
utils::capture.output(model_performance_df <-
performance::model_performance(model))
if (length(performance_warning) > 0) {
warning(performance_warning)
}
colnames(model_performance_df) <-
stringr::str_replace_all(
pattern = "R2",
replacement = "R^2",
string = colnames(model_performance_df)
)
colnames(model_performance_df) <-
stringr::str_replace_all(
pattern = "Sigma",
replacement = "$sigma$",
string = colnames(model_performance_df)
)
################################################ Output Table ################################################
if (quite == FALSE) {
# check whether quite the entire output table
cat("\n \n")
super_print("underline|Model Summary")
super_print("Model Type = {model_type}")
super_print("Outcome = {DV}")
super_print("Predictors = {IV}")
if (!is.null(family)) {
super_print("Family = {family}")
}
super_print("\n")
# super_print model estimates table
super_print("underline|Model Estimates")
print_table(model_summary_df)
super_print("\n")
# super_print model performance table
super_print("underline|Goodness of Fit")
print_table(model_performance_df)
if (streamline == FALSE) {
# Check assumption
super_print("\n")
super_print("underline|Model Assumption Check")
if (convergence_check == TRUE) {
try({
convergence_output <- performance::check_convergence(model)
if (convergence_output[[1]] == TRUE) {
super_print("green|OK: Model is converged")
} else {
gradient <- attributes(convergence_output)$gradient
super_print("red|Warning: Model is not converged with gradient of {gradient}")
}
})
}
if (singular_check == TRUE) {
try({
singular_output <- performance::check_singularity(model)
if (singular_output == TRUE) {
super_print("red|Warning: Singularity is detected. See ?lme4::isSingular()")
} else {
super_print("green|OK: No singularity is detected")
}
})
}
if (autocorrelation_check == TRUE) {
tryCatch({
print(performance::check_autocorrelation(model))
super_print("\n")
},
error = function(cond) {
warning("Unable to check autocorrelation. Perhaps change na.action to na.omit")
})
}
if (normality_check == TRUE) {
# first check_normality, if failed, fallback to check_distribution, if failed, super_print failed message
tryCatch(
suppressMessages(print(
performance::check_normality(model)
)),
error = function(cond) {
tryCatch({
# fall back to check_distribution
dist_prob <- performance::check_distribution(model)
norm_dist_pos <-
which(dist_prob$Distribution == "normal")
residual_norm_prob <-
round(dist_prob$p_Residuals[norm_dist_pos] * 100, 0)
response_norm_prob <-
round(dist_prob$p_Response[norm_dist_pos] * 100, 0)
norm_prob <-
c(residual_norm_prob, response_norm_prob)
if (all(norm_prob >= 80)) {
residual_norm_prob <- paste(norm_prob[1], "%", sep = "")
response_norm_prob <-
paste(norm_prob[2], "%", sep = "")
super_print(
"green|OK. No non-normality is detected. Normal distribution proability: residual ({residual_norm_prob}) and response ({response_norm_prob}). check_normality() failed use fallback"
)
} else if (any(norm_prob < 80) &
all(norm_prob > 50)) {
residual_norm_prob <- paste(norm_prob[1], "%", sep = "")
response_norm_prob <-
paste(norm_prob[2], "%", sep = "")
super_print(
"yellow|Cautious: Moderate non-normality is detected. Normal distribution proability: residual ({residual_norm_prob}) and response ({response_norm_prob}). check_normality() failed use fallback"
)
} else if (any(norm_prob <= 50)) {
residual_norm_prob <- paste(norm_prob[1], "%", sep = "")
response_norm_prob <-
paste(norm_prob[2], "%", sep = "")
super_print(
"red|Warning: Severe non-normality is detected. Normal distribution proability: residual ({residual_norm_prob}) and response ({response_norm_prob}). check_normality() failed use fallback"
)
}
},
error = function(cond) {
super_print("blue|Unable to check normality. All fallback failed.")
})
}
)
}
if (outlier_check == TRUE) {
tryCatch(
print(performance::check_outliers(model)),
error = function(cond) {
super_print("blue|Unable to check autocorrelation. Try changing na.action to na.omit.")
}
)
}
if (heteroscedasticity_check == TRUE) {
try(print(performance::check_heteroscedasticity(model)))
}
if (collinearity_check == TRUE) {
try({
is_interaction = insight::find_interactions(model)
if (is.null(is_interaction)) {
collinearity_df <- performance::check_collinearity(model)
if (all(collinearity_df$VIF < 5)) {
super_print("green|OK: No multicolinearity detected (VIF < 5)")
} else if (any(collinearity_df$VIF >= 5) &
all(collinearity_df$VIF < 10)) {
super_print(
"yellow|Cautious: Moderate multicolinearity detected (5 < VIF < 10). Please inspect the following table to identify high correlation factors."
)
super_print("underline|Multicollinearity Table ")
print_table(collinearity_df)
} else if (any(collinearity_df$VIF > 10)) {
super_print(
"red|Warning: Severe multicolinearity detected (VIF > 10). Please inspect the following table to identify high correlation factors."
)
super_print("underline|Multicollinearity Table ")
print_table(collinearity_df)
}
} else{
super_print(
'Multicollinearity is not checked for models with interaction terms. You may check multicollinearity among predictors of a model without interaction terms'
)
}
})
}
}
} # quite stop here
# Check assumption plot
if (assumption_plot == TRUE) {
if (all(unlist(lapply(
c("gridExtra", "qqplotr", "see"), requireNamespace
)))) {
assumption_plot <- tryCatch({
assumption_plot <- suppressMessages(performance::check_model(model))
print(assumption_plot)
},
error = function(cond) {
warning("assumption_plot does not support this model type")
warning(cond)
return(NULL)
})
} else {
stop(
"Please install.packages(c('gridExtra','qqplotr','see')) to use assumption_plot"
)
}
} else {
assumption_plot <- NULL
}
cat("\n")
if (return_result == TRUE) {
return_list <- list(
model_summary = model_summary_df,
model_performance_df = model_performance_df,
assumption_plot = assumption_plot
)
return(return_list)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.