#' A multiple linear regression function.
#'
#' Multiple linear regression function which returns the coeficients (the regression weights) of the independent variables with their error,
#' t-value and p-value also an a estimate for a residual variance and the degrees of freedom in the model.
#' The description of the linear model can be found here: \url{https://en.wikipedia.org/wiki/Regression_analysis}
#'
#' @field formula: an object of class object, the model to fitted
#' @field data: an data frame, contain the variables in the model
#' @field coefficients: an matrix, regressions coefficients
#' @field fitted_values: an matrix, fitted values
#' @field residuals: an matrix, the residulas
#' @field df: an integer, the degrees of freedom
#' @field residual_var: numeric, the residual variance
#' @field coefficients_var: numeric, the variance of the regression coefficients
#' @field t_values: an matrix, the t-values for each coefficient
#'
#' @import methods
#' @export linreg
#' @exportClass linreg
linreg <- setRefClass("linreg",
fields = list(
formula = "formula",
data = "character",
coefficients = "matrix",
fitted_values = "matrix",
residuals = "matrix",
df = "integer",
residual_var = "numeric",
coefficients_var = "numeric",
t_values = "matrix"
),
methods = list(
##### initialize all fields #####
initialize = function(formula, data) {
"Initialize all regression variables"
if (class(formula) != "formula") {
stop("invalid formula")
}
if (class(data) != "data.frame") {
stop("invalid data, data has to be data.frame")
}
# independent variables
X <- model.matrix(formula, data)
# dependent variables
y <- data[, all.vars(formula)[1]]
# QR decomposition
qr_factor <- qr(X)
qr_Q <- qr.Q(qr_factor)
qr_R <- qr.R(qr_factor)
# Regressions coefficients, βˆ = inverse(R) t(Q) yˆ (where X = QR)
coefficients <<- solve(qr_R) %*% t(qr_Q) %*% y
# Fitted Values, yˆ = X βˆ
fitted_values <<- X %*% coefficients
# The residuals, eˆ = y − yˆ = y − X βˆ
residuals <<- y - fitted_values
# The degrees of freedom, df = n − k - 1
# n:number of observations, k:number of independent variables
df <<- nrow(data) - ncol(X)
# The residual variance, σˆ2 = eTe/df
residual_var <<- as.numeric((t(residuals) %*% residuals) / df)
# The variance of the regression coefficients
coefficients_var <<- residual_var * diag(solve(qr_R) %*% solve(t(qr_R)))
# The t-values for each coefficient
t_values <<- coefficients / sqrt(coefficients_var)
formula <<- formula
data <<- deparse(substitute(data))
},
#############################################
##### print function #####
print = function() {
"Show linreg formula and coefficients of indepentends"
cat("Call:\n")
cat(paste0("linreg(formula = ", format(formula),
", data = ", data, ")\n\n"))
cat("Coefficients:\n")
cat(row.names(coefficients)[1])
for (i in 2:(length(coefficients))) {
cat(sprintf("%20s", row.names(coefficients)[i]))
}
cat("\n ")
cat(round(coefficients[1], 3))
for (i in 2:(length(coefficients))) {
coef <- as.character(round(coefficients[i], 3))
cat(sprintf("%20s", coef))
}
},
#############################################
##### plot function #####
plot = function() {
"Show residuals vs fitted plot and scale-location plot"
# data.frame for plot variables
plot.df <- data.frame(fitted_values = fitted_values,
residuals = residuals,
standar_residuals = (abs(residuals/(residual_var^(1/2))))^(1/2))
# Residuals vs Fitted
p1 <- ggplot2::ggplot(plot.df, ggplot2::aes(x = fitted_values, y = residuals)) +
ggplot2::theme_classic() +
ggplot2::theme(axis.title = ggplot2::element_text(face = "bold"), # axis names size
plot.title = ggplot2::element_text(face = "bold", hjust = 0.5),
panel.border = ggplot2::element_rect(fill = NA, size = 1)) +
ggplot2::geom_point() +
ggplot2::geom_smooth(method = "loess", se = FALSE, col = "red", lwd = 0.5) +
ggplot2::geom_hline(yintercept = 0, linetype = "dotted", color = "grey") +
ggplot2::scale_x_continuous(name = paste0("Fitted Values\nlinreg(", format(formula), ")")) +
ggplot2::scale_y_continuous(name = "Residuals") +
ggplot2::ggtitle("Residuals vs Fitted")
# Scale-Location
p2 <- ggplot2::ggplot(plot.df, ggplot2::aes(x = fitted_values, y = standar_residuals)) +
ggplot2::theme_classic() +
ggplot2::theme(axis.title = ggplot2::element_text(face = "bold"), # axis names size
plot.title = ggplot2::element_text(face = "bold", hjust = 0.5),
panel.border = ggplot2::element_rect(fill = NA, size = 1)) +
ggplot2::geom_point() +
ggplot2::geom_smooth(method = "loess", se = FALSE, col = "red", lwd = 0.5) +
ggplot2::scale_x_continuous(name = paste0("Fitted Values\nlinreg(", format(formula), ")")) +
ggplot2::scale_y_continuous(name = expression(sqrt(abs("Standar residuals")))) +
ggplot2::ggtitle("Scale-Location")
gridExtra::grid.arrange(p1, p2)
},
#############################################
##### resid function #####
resid = function() {
"Return residuals"
return(as.vector(residuals))
},
#############################################
##### pred function #####
pred = function() {
"Return fitted values"
return(as.vector(fitted_values))
},
#############################################
##### coef function #####
coef = function() {
"Return coeficients"
coef <- as.vector(coefficients)
names(coef) <- row.names(coefficients)
return(coef)
},
#############################################
##### summary function #####
summary = function() {
"Show resituals, coefficients with their estimate, standard error, t-value and p-value"
##### Calls #####
cat("Call:\n")
cat(paste0("linreg(formula = ", format(formula),
", data = ", data, ")\n\n"))
#############################################
##### Residuals #####
cat("Residuals:\n")
residual_quan <- round(quantile(residuals), 3)
cat(format(c("Min", "1Q", "Median", "3Q", "Max"), width = 10, justify = "right"))
cat("\n")
cat(format(residual_quan, width = 10, justify = "right"))
#############################################
##### Coefficients #####
cat("\n")
cat("\nCoefficients:\n")
estimate <- round(coefficients, 3)
std_error <- round(sqrt(as.numeric(coefficients_var)), 3)
t_value <- round(t_values, 3)
p_value <- round(2*pt(-abs(t_values), df = nrow(residuals)-1), 6)
# signif marks
signif <- c()
for (i in p_value) {
if (i < 0.001) {
signif <- c(signif, "***")
next()
}
if (i > 0.001 & i < 0.01) {
signif <- c(signif, "**")
next()
}
if (i > 0.01 & i < 0.05) {
signif <- c(signif, "*")
next()
}
if (i > 0.05 & i < 0.1) {
signif <- c(signif, ".")
next()
} else {
signif <- c(signif, " ")
}
}
coef_df <- data.frame(estimate, std_error, t_value, p_value, signif)
coef_df$signif <- as.character(coef_df$signif)
width <- max(nchar(row.names(coef_df)))
cat(" ")
cat(format(c("Estimate", "Std. Error", "t value", "Pr(>|t|)"), width = width, justify = "right"))
cat("\n")
for (i in 1:nrow(coef_df)) {
cat(format(c(rownames(coef_df)[i], coef_df[i, ]), width = width, justify = "right"))
cat("\n")
}
#############################################
##### Residual standard error #####
cat("\n")
cat(paste0("Residual standard error: ",
round(sqrt(residual_var), 3),
" on ", df, " degrees of freedom \n"))
#############################################
}
)
)
#' @importFrom ggplot2 ggplot
#' @importFrom gridExtra grid.arrange
NULL
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.