In Philhoels/Lab4AdvR: Linear Regression Package

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

This is a linear regression package in R

In this package are following functions inplemented print(), plot(), resid(), pred(), coef() and summary().

A demonstration will follow later in on this page.

First of all, this package empowers you to create linreg objects. This is a references class (RC) object.

For all demonstrations is the data set "iris" used.

Create an object "mod_object"

mod_object <- linreg(Petal.Length~Species, data = iris)

We created an linreg object ob the object type RC. Used the as input data "iris" and used "Petal.Length" as dependent variable and as "Species" independent variable of the linear regression.

With the created object you can create followed calculations:

Regressions coefficients - regressions_coef()
fitted values - fitted_values()
residuals -resi()
degrees of freedom - dof()
residual variance - resi_var()
variance of the regression coefficients - regressions_var()
t-values for each coefficient - t_value()

Example:

mod_object$regressions_coef

Methods

Like already maintained are some methods implemented.

How to use the methods:

print() print out the coefficients and coefficient names, similar as done by the lm class.

mod_object$print()

plot() it will plot the two plots using ggplot2.

mod_object$plot()

resid() it returns the vector of residuals e.

mod_object$resid()

pred() it returns the predicted values yˆ.

mod_object$pred()

coef() it returns the coefficients as a named vector.

mod_object$coef()

summary() it returns a similar printout as printed for lm objects, but only present the coefficients with their standard error, t-value and p-value as well as the estimate of σˆ and the degrees of freedom in the model.

mod_object$summary()

The Code

linreg <- setRefClass("linreg",
                      fields = list(
                        X = "matrix",
                        y = "matrix",
                        regressions_coef = "matrix",
                        fitted_values = "matrix",
                        resi = "matrix",
                        n = "numeric",
                        p = "numeric",
                        dof = "numeric",
                        regressions_var = "matrix",
                        resi_var = "matrix",
                        t_value = "matrix",
                        m_formula ="formula",
                        m_data = "character"),

#-------------------------------------------------------------------------
                      methods = list
                      (
                        #This function will calculate necessary parameters
                        initialize = function(formula, data)
                        {
                          #Generate X and y
                          X <<- model.matrix(formula, data)
                          y <<- as.matrix(data[all.vars(formula)[1]])

                          #Regressions coeficients
                          regressions_coef<<- as.matrix(solve(t(X)%*%X) %*% t(X)%*%y)

                          fitted_values <<- X%*%regressions_coef

                          #Residuals
                          resi <<- y - fitted_values

                          #Degrees of fredom
                          n <<- length(X[,1])
                          p <<- length(X[1,])
                          dof <<- n - p

                          #Variance of the regression coeffcients
                          resi_var <<- (t(resi) %*% resi)/dof
                          regressions_var <<- as.numeric(resi_var) * solve(t(X) %*% X)

                          t_value <<- regressions_coef/as.double(sqrt(resi_var))

                          #Metadata
                          m_formula <<- formula
                          m_data <<- deparse(substitute(data))
                        },

                        #The print method
                        print = function() {
                          cat(paste("Call: \n"))
                          cat(paste("linreg(formula = ",format(m_formula), ", data = ", m_data, ")\n\n", sep = ""))
                          cat(paste("Coefficients:\n"))

                          coef <- structure(as.vector(regressions_coef), names= format(rownames(regressions_coef)))
                          my_print(coef)
                        },

                        #The plot method
                        plot = function()
                        {
                          library(ggplot2)

                          linkoping_theme <-
                            theme(
                              plot.margin = unit(c(1.2,1.2,1.2,1.2), "cm"),
                              panel.background = element_rect(fill="#3dd2dc"),
                              panel.grid.major.y = element_blank(),
                              panel.grid.minor.y = element_blank(),
                              panel.grid.major.x = element_blank(),
                              panel.grid.minor.x = element_blank(),
                              axis.line = element_line(color= "#58585b", size=1),
                              axis.text.x = element_text(color="#1d1d1b", size="11"),
                              axis.text.y = element_text(color="#1d1d1b", size="11"),
                              axis.title.x = element_text(color="Black", size="12", face="bold.italic"),
                              axis.title.y = element_text(color="Black", size="12", face="bold.italic"),
                              axis.ticks.y = element_blank(),
                              axis.ticks.x = element_line(color = "#9D9ADC", size = 0.3),
                              plot.title = element_text(color="blue2", face="bold", size="14"),
                              legend.position="bottom", legend.title = element_blank(),
                              legend.text = element_text(color="Black", size="12")
                            )

                          #Plot 1
                          residuals_vs_fitted <- ggplot(data.frame(resi, fitted_values), aes(x=fitted_values, y=resi)) +
                            geom_point() +
                            stat_smooth(method='lm', colour="red", se=FALSE, span = 1) +
                            xlab(paste("Fitted Values\n", "linreg(", format(m_formula), ")", ""))+
                            ylab("Residuals") +
                            ggtitle("Residuals vs Fitted") +
                            linkoping_theme

                          #Print plot 1
                          my_print(residuals_vs_fitted)

                          #--------------------------------
                          #Prepare for plot 2
                          std_vs_fitted <- as.data.frame(cbind(sqrt(abs(resi-mean(resi))), fitted_values))
                          names(std_vs_fitted) = c("Standardized_residuals", "fitted_values")
                          y_plot <- std_vs_fitted[,1]

                          #Plot 2
                          plot2 <- ggplot(std_vs_fitted, aes(x = fitted_values, y = y_plot))+
                            geom_point() +
                            stat_smooth(method='lm', colour="red", se=FALSE, span = 1) +
                            xlab(paste("Fitted Values\n", "linreg(", format(m_formula), ")", ""))+
                            ylab(expression(sqrt("|Standardized residuals|"))) +
                            ggtitle("Scale Location") +
                            linkoping_theme

                          #Print plot2
                          my_print(plot2)
                        },
                        resid = function(){
                          return(resi)
                        },
                        pred = function()
                        {
                          return(fitted_values)
                        },
                        coef = function(){
                          coef <- structure(as.vector(regressions_coef), names= row.names(regressions_coef))
                          return(coef)
                        },

                        #Summary method
                        summary = function()
                        {
                          cat(paste("Call: \n\n"))
                          cat(paste("linreg(formula = ",format(m_formula), ", data = ", m_data, ")\n\n", sep = ""))
                          cat(paste("Coefficients:\n\n"))

                          regressions_var_diagonal <- diag(regressions_var)
                          table = data.frame(matrix(ncol = 5, nrow = 0))
                          for (i in 1:length(regressions_coef))
                          {
                            this_t_value = regressions_coef[i]/sqrt(regressions_var_diagonal[i])
                            this_p_value = 2*pt(abs(this_t_value), dof, lower.tail = FALSE)
                            row = data.frame(round(regressions_coef[i], 2), round(sqrt(regressions_var[i, i]), 2), round(this_t_value, 2), formatC(this_p_value, format = "e", digits = 2), write_star(this_p_value))
                            row.names(row)[1] = row.names(regressions_coef)[i]
                            table = rbind(table, row)
                          }
                          colnames(table) = c("Estimate", "Std. Error", "t value", "Pr(>|t|)", "")
                          my_print(table)

                          cat(paste("\n"))
                          cat(paste("Residual standard error: ",round(sd(resi),3), " on ", dof, " degrees of freedom", sep = ""))
                        }

                      )
)

#'my_print function
#'Our own print function, because we can't call the default function inside RC object
#'@examples print("This is an example!")
#'@param x is the input.

my_print = function(x) {
    print(x)
}

#'write_star function
#'generate the start base on p_value, follow the rule of lm function
#'@param p_value the input
write_star = function(p_value) {
  if (p_value > 0.1)
    output <- ""
  else if (p_value > 0.05)
    output <- "."
  else if (p_value > 0.01)
    output <- "*"
  if (p_value > 0.001)
    output <- "**"
  else  output <- "***"
  return(output)
}

#How to use:
#1. Run this file (both linreg object and the my_print, write_star function)
#2. mod_object <- linreg(Petal.Length~Species, data = iris)
#3. mod_object$print()
#4. mod_object$plot()
#5. mod_object$summary()

#a <- linreg$new(Petal.Length~Sepal.Width+Sepal.Length, data = iris)