Nothing
#' @title Multiple Linear Regression Full Model Diagnostics
#'
#' @description
#' Fits a multiple linear regression model and provides detailed diagnostics
#' including ANOVA table, multicollinearity, heteroscedasticity, normality test,
#' and diagnostic plots.
#'
#' @param data A data frame containing dependent variable (y) and independent variables (x's)
#'
#' @return A list containing:
#' \itemize{
#' \item model_summary: Summary of regression model
#' \item anova_table: ANOVA table (SSR, SSE, SST)
#' \item vif: Variance Inflation Factor values
#' \item gq_test: Goldfeld-Quandt test result
#' \item shapiro_test: Shapiro-Wilk normality test result
#' \item actual_vs_fitted: Data frame of actual vs fitted values
#' }
#'
#' @importFrom stats lm resid predict shapiro.test pf
#' @importFrom car vif
#' @importFrom lmtest gqtest
#' @export
RegAnalysis <- function(data) {
if (!is.data.frame(data)) stop("data must be a data frame")
if (!all(sapply(data, is.numeric))) stop("All variables must be numeric")
n <- nrow(data)
y <- data[[1]]
x_vars <- names(data)[-1]
formula <- as.formula(paste(names(data)[1], "~", paste(x_vars, collapse = "+")))
model <- stats::lm(formula, data = data)
model_summary <- summary(model)
model_summary
SST <- sum((y - mean(y))^2)
SSE <- sum(resid(model)^2)
SSR <- SST - SSE
df_total <- n - 1
df_reg <- length(x_vars)
df_res <- n - df_reg - 1
MSR <- SSR / df_reg
MSE <- SSE / df_res
F_value <- MSR / MSE
p_value <- stats::pf(F_value, df_reg, df_res, lower.tail = FALSE)
anova_table <- data.frame(
Source = c("Regression", "Error", "Total"),
DF = c(df_reg, df_res, df_total),
SS = round(c(SSR, SSE, SST), 3),
MS = round(c(MSR, MSE, NA), 3),
F = round(c(F_value, NA, NA), 3),
"Pr(>F)" = c(round(p_value, 5), NA, NA)
)
anova_table
if(length(x_vars) > 1){
vif_vals <- car::vif(model)
round(vif_vals, 3)
} else {
vif_vals <- NA
message("\nOnly one predictor -> VIF not applicable\n")
}
gq <- lmtest::gqtest(model)
gq
shapiro <- stats::shapiro.test(resid(model))
shapiro
fitted <- stats::predict(model)
actual_vs_pred <- data.frame(
Actual = round(y, 2),
Fitted = round(fitted, 2)
)
actual_vs_pred
oldpar <- par(no.readonly = TRUE)
on.exit(par(oldpar))
par(family = "serif", font = 2,
mfrow = c(2,3),
cex.lab = 1.2,
cex.axis = 1.1,
cex.main = 1.3)
plot(model)
r2 <- model_summary$r.squared
plot(y, fitted,
xlab = "Actual Values",
ylab = "Fitted Values",
main = "Actual vs Fitted",
pch = 19)
abline(0, 1, col = "red", lwd = 2)
text(x = min(y) + 0.05*diff(range(y)),
y = max(fitted) - 0.05*diff(range(fitted)),
labels = bquote(bold(R^2 == .(round(r2,2)))),
col = "blue", adj = c(0,1), cex = 1.3)
plot(fitted, resid(model),
xlab = "Fitted Values",
ylab = "Residuals",
main = "Residuals vs Fitted",
pch = 19)
abline(h = 0, col = "blue", lwd = 2)
graphics::par(mfrow = c(1,1))
return(list(
model_summary = model_summary,
anova_table = anova_table,
vif = vif_vals,
gq_test = gq,
shapiro_test = shapiro,
actual_vs_fitted = actual_vs_pred
))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.