#' A Reference Class to represent the regression model.
#' @field formula A character object representing the formula.
#'
#' @field data A string representing the name of the data set implemented to build the model.
#'
#' @field Coeffficients A named numeric vector representing the regressin coefficients of
#' the model parameters.
#' @field Fitted_Values A matrix containing the predicted values of the model.
#' @field Residuals A matrix consisting the residual values of the models.
#' @field Standard_Residuals a matrix containing the standardized residuals.
#' @field Sqrt_Standard_Residuals A matrix including sqrt of the standardized residuals.
#' @field degrees_of_freedom A numeric representing the degrees of freedom.
#' @field Residual_Variance A matrix consisting the variance of the residuals.
#' @field Coefficients_Variance A numeric vector representing the variances
#' of the coefficients.
#' @field t_value A numeric vector representing the t-value.
#' @field p-value A numeric vector representing the p-value.
#'
#' @import methods
#'
#' @seealso [https://en.wikipedia.org/wiki/Linear_regression]
#' @examples linreg_mod <- linreg$new(Petal.Length~Sepal.Width+Sepal.Length, data=iris)
#' @export
#' @export "linreg"
linreg <- setRefClass("linreg",
fields = list(
Data = "character",
Call = "character",
paramNames = "character",
Coefficients = "numeric",
Fitted_Values = "matrix",
Residuals = "matrix",
Standard_Residuals = "matrix",
Sqrt_Standard_Residuals = "matrix",
degrees_of_freedom = "numeric",
Residual_Variance = "matrix",
Coefficients_Variance = "numeric",
stdErrorCoef = "numeric",
t_value = "numeric",
p_value = "numeric",
Plot1 = "data.frame",
Plot2 = "data.frame"
),
methods = list(
#' @import stats
#' @param formula A formula indicating the dependent and independent variables.
#' @param data The data set from which values of observations are extracted
#' based on the formula specification, for building the regression model.
#'
initialize = function(formula, data) {
"Initializing Function. A function to create a multiple linear regression model."
Data <<- deparse(substitute(data))
all_var <- all.vars(formula)
dep_var <- all_var[1]
ind_var <- all_var[2:length(all_var)]
Call <<- paste("linreg(formula =", paste(format(formula),",", sep = ""), "data =", paste(Data,")", sep = ""))
X <- model.matrix(formula, data = data )
paramNames <<- colnames(X)
inverse <- solve(t(X) %*% X)
y <- data[[dep_var]]
Coefficients <<- as.vector(inverse %*% (t(X) %*% y))
names(Coefficients) <<- colnames(X)
Fitted_Values <<- X %*% Coefficients
Residuals <<- y - Fitted_Values
Standard_Residuals <<- ((Residuals - mean(Residuals))/sd(Residuals))
Sqrt_Standard_Residuals <<- sqrt(abs(Standard_Residuals))
n <- length(data[[dep_var]])
p <- length(Coefficients)
degrees_of_freedom <<- n - p
Residual_Variance <<- (t(Residuals) %*% Residuals)/degrees_of_freedom
Coefficients_cov <- (inverse * as.vector(Residual_Variance))
Coefficients_Variance <<- diag(Coefficients_cov)
stdErrorCoef <<- as.vector(sqrt(Coefficients_Variance))
t_value <<- (Coefficients / sqrt(Coefficients_Variance))
p_value <<- (2*pt(-abs(t_value), df = degrees_of_freedom))
Plot1 <<- data.frame(Nrow = 1:length(Fitted_Values), Fitted_Values = Fitted_Values, Residuals = Residuals )
Plot2 <<- data.frame(Nrow = 1:length(Fitted_Values), Fitted_Values = Fitted_Values, Sqrt_Standard_Residuals = Sqrt_Standard_Residuals)
return(.self)
},
print = function() {
"Prints out the coefficients and coefficient names,
in addition to the call of the function."
cat("\nCall:\n", Call,"\n\n")
cat("Coefficients:\n")
print.default(format(Coefficients, digits = 5), print.gap = 2L, quote = FALSE)
},
#' @import ggplot2
#' @import gridExtra
#'
plot = function(){
"Plots two different plots. The fiitted values vs the residuals,
and the fiitted values vs square root of standardized residuals."
p1 = ggplot(data = Plot1, mapping = aes(x = Fitted_Values, y = Residuals, label = Nrow)) +
geom_point(color = "black", size = 3, shape = 1) +
geom_smooth(method = "loess", formula = y ~ cumsum(x), color = "red")+
geom_label(data = subset(Plot1, Nrow == 99 | Nrow == 118 | Nrow == 119))+
xlab(paste("Fitted values\n", "linreg(", format(formula),")")) +
ylab("Residuals")+
ggtitle("Residuals vs Fitted") +
theme(plot.title = element_text(hjust = 0.5))
p2 = ggplot(data = Plot2, mapping = aes(x = Fitted_Values, y = Sqrt_Standard_Residuals, label = Nrow))+
geom_point(color = "black", size = 3, shape = 1) +
stat_summary(fun.y = mean, col = "red", geom = "line", size = 1)+
geom_label(data = subset(Plot2, Nrow == 99 | Nrow == 118 | Nrow == 119))+
xlab(paste("Fitted values\n", "linreg(", format(formula),")")) +
ylab(expression(sqrt(abs("Standardized residuals")))) +
ggtitle("Scale-Location") +
theme(plot.title = element_text(hjust = 0.5))
grid.arrange(p1, p2, nrow=2)
},
#'
resid = function(){
"Returns a vector of residual values."
return(as.vector(Residuals))
},
#'
pred = function(){
"Returns a the predicted values."
return(as.vector(Fitted_Values))
},
#'
coef = function(){
"Returns the coefficients."
return(Coefficients)
},
#'
summary = function(){
"Returns the coefficients with their standard error,
t-value and p-value as well as the estimate of standard
error and the degrees of freedom in the model."
cat("\nCall:\n", Call,"\n\n")
cat("Coefficients:\n")
cat(" ", "Estimate", " Std.Error", " t.value", " p.value", "\n")
for(i in seq_along(Coefficients)){
cat(paramNames[i], format(Coefficients[i], digits = 5), format(stdErrorCoef[i], digits = 5),
format(t_value[i], digits = 5), paste(format(p_value[i], digits = 5), "***"), sep = " ")
cat("\n")
}
cat("---")
cat("\n")
cat("Residual standard error:", round(sqrt(Residual_Variance), digits = 4),
"on", degrees_of_freedom, "degrees of freedom" )
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.