R/lin-reg-model.R

#' A Reference Class to represent the regression model.    
#' @field formula A character object representing the formula.    
#'    
#' @field data A string representing the name of the data set implemented to build the model.    
#'    
#' @field Coeffficients A named numeric vector representing the regressin coefficients of
#'    the model parameters.  
#' @field Fitted_Values A matrix containing the predicted values of the model.    
#' @field Residuals A matrix consisting the residual values of the models.  
#' @field Standard_Residuals a matrix containing the standardized residuals. 
#' @field Sqrt_Standard_Residuals A matrix including sqrt of the standardized residuals.
#' @field degrees_of_freedom A numeric representing the degrees of freedom.
#' @field Residual_Variance A matrix consisting the variance of the residuals.
#' @field Coefficients_Variance A numeric vector representing the variances 
#'    of the coefficients.
#' @field t_value A numeric vector representing the t-value.
#' @field p-value A numeric vector representing the p-value. 
#' 
#' @import methods 
#' 
#' @seealso [https://en.wikipedia.org/wiki/Linear_regression]
#' @examples linreg_mod <- linreg$new(Petal.Length~Sepal.Width+Sepal.Length, data=iris)
#' @export 
#' @export "linreg"

 
  linreg <- setRefClass("linreg",
               fields = list(
                 
                 Data = "character",
                 Call = "character",
                 paramNames = "character",
                 Coefficients = "numeric",
                 Fitted_Values = "matrix",
                 Residuals = "matrix",
                 Standard_Residuals = "matrix",
                 Sqrt_Standard_Residuals = "matrix",
                 degrees_of_freedom = "numeric",
                 Residual_Variance = "matrix",
                 Coefficients_Variance = "numeric",
                 stdErrorCoef = "numeric",
                 t_value = "numeric",
                 p_value = "numeric",
                 Plot1 = "data.frame",
                 Plot2 = "data.frame"
                 ),
               
              methods = list(
               #' @import stats
               #' @param formula A formula indicating the dependent and independent variables.
               #' @param data The data set from which values of observations are extracted
               #'     based on the formula specification, for building the regression model.
               #' 
               initialize = function(formula, data) {
                  "Initializing Function. A function to create a multiple linear regression model."
                  
                  Data <<- deparse(substitute(data))
                  all_var <- all.vars(formula)
                  dep_var <- all_var[1]
                  ind_var <- all_var[2:length(all_var)]
                  
                  Call <<- paste("linreg(formula =", paste(format(formula),",", sep = ""), "data =", paste(Data,")", sep = ""))

                  X <- model.matrix(formula, data = data )
                  paramNames <<- colnames(X)
                  
                  inverse <- solve(t(X) %*% X)

                  y <- data[[dep_var]]

                  Coefficients <<- as.vector(inverse %*% (t(X) %*% y))
                  names(Coefficients) <<- colnames(X)

                  Fitted_Values <<- X %*% Coefficients

                  Residuals <<- y - Fitted_Values
                  Standard_Residuals <<- ((Residuals - mean(Residuals))/sd(Residuals))
                  Sqrt_Standard_Residuals <<- sqrt(abs(Standard_Residuals))
                  

                  n <- length(data[[dep_var]])
                  p <- length(Coefficients)
                  degrees_of_freedom <<- n - p
                  
               
                  Residual_Variance <<- (t(Residuals) %*% Residuals)/degrees_of_freedom
                  Coefficients_cov <- (inverse * as.vector(Residual_Variance))
                  Coefficients_Variance <<- diag(Coefficients_cov)
                  stdErrorCoef <<- as.vector(sqrt(Coefficients_Variance))
                  
                  t_value <<- (Coefficients / sqrt(Coefficients_Variance))
                  p_value <<- (2*pt(-abs(t_value), df = degrees_of_freedom))
                  
                  
                  Plot1 <<- data.frame(Nrow = 1:length(Fitted_Values), Fitted_Values = Fitted_Values, Residuals = Residuals )
                  Plot2 <<- data.frame(Nrow = 1:length(Fitted_Values), Fitted_Values = Fitted_Values, Sqrt_Standard_Residuals = Sqrt_Standard_Residuals)
                  return(.self)
                },
               
              
                print = function() {
                  "Prints out the coefficients and coefficient names,
                   in addition to the call of the function."
                  
                  cat("\nCall:\n", Call,"\n\n")
                  
                  cat("Coefficients:\n")
                  
                  print.default(format(Coefficients, digits = 5), print.gap = 2L, quote = FALSE)
                  
                  },
               
               #' @import ggplot2
               #' @import gridExtra
               #'
                plot = function(){
                  "Plots two different plots. The fiitted values vs the residuals,
                  and the fiitted values vs square root of standardized residuals."
                  
                  p1 = ggplot(data = Plot1, mapping = aes(x = Fitted_Values, y = Residuals, label = Nrow)) +
                    geom_point(color = "black", size = 3, shape = 1) +
                    geom_smooth(method = "loess", formula = y ~ cumsum(x), color = "red")+
                    geom_label(data = subset(Plot1, Nrow == 99 | Nrow == 118 | Nrow == 119))+
                    xlab(paste("Fitted values\n", "linreg(", format(formula),")")) +
                    ylab("Residuals")+
                    ggtitle("Residuals vs Fitted") +
                    theme(plot.title = element_text(hjust = 0.5))
                    

                       
                  p2 = ggplot(data = Plot2, mapping = aes(x = Fitted_Values, y = Sqrt_Standard_Residuals, label = Nrow))+
                    geom_point(color = "black", size = 3, shape = 1) +
                    stat_summary(fun.y = mean, col = "red", geom = "line", size = 1)+
                    geom_label(data = subset(Plot2, Nrow == 99 | Nrow == 118 | Nrow == 119))+
                    xlab(paste("Fitted values\n", "linreg(", format(formula),")")) +
                    ylab(expression(sqrt(abs("Standardized residuals")))) +
                    ggtitle("Scale-Location") +
                    theme(plot.title = element_text(hjust = 0.5))
                  
                  
                  grid.arrange(p1, p2, nrow=2)
                 
                  },
               
               #'
                resid = function(){
                  "Returns a vector of residual values."
                  
                  return(as.vector(Residuals))
                  },
               
               #'
                pred = function(){
                  "Returns a the predicted values."
                  return(as.vector(Fitted_Values))
                  },
               
               #' 
                coef = function(){
                  "Returns the coefficients."
                  return(Coefficients)
                  },
               
               #' 
                summary = function(){
                  "Returns the coefficients with their standard error,
                   t-value and p-value as well as the estimate of standard 
                   error and the degrees of freedom in the model."
                  
                  cat("\nCall:\n", Call,"\n\n")
                  cat("Coefficients:\n")
                  cat("           ", "Estimate", "  Std.Error", "  t.value", " p.value", "\n")
                  
                  for(i in seq_along(Coefficients)){
                    cat(paramNames[i], format(Coefficients[i], digits = 5), format(stdErrorCoef[i], digits = 5),
                        format(t_value[i], digits = 5), paste(format(p_value[i], digits = 5), "***"), sep = "   ")
                    cat("\n")

                  }
                  
                  
                  cat("---")
                  cat("\n")
                  
                  cat("Residual standard error:", round(sqrt(Residual_Variance), digits = 4),
                   "on", degrees_of_freedom, "degrees of freedom" )
                }
                )
              )

  
mpirmoradiyan/lab4a documentation built on Nov. 4, 2019, 7:31 p.m.