knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

This is a linear regression package in R

In this package are following functions inplemented print(), plot(), resid(), pred(), coef() and summary().

A demonstration will follow later in on this page.

First of all, this package empowers you to create linreg objects. This is a references class (RC) object.

Create an object "mod_object"

mod_object <- linreg(Petal.Length~Species, data = iris)

We created an linreg object ob the object type RC. Used the as input data "iris" and used "Petal.Length" as dependent variable and as "Species" independent variable of the linear regression.

With the created object you can create followed calculations:

Example:

mod_object$regressions_coef

Methods

Like already maintained are some methods implemented.

How to use the methods:

mod_object$print()
mod_object$plot()
mod_object$resid()
mod_object$pred()
mod_object$coef()
mod_object$summary()

The Code

linreg <- setRefClass("linreg",
                      fields = list(
                        X = "matrix",
                        y = "matrix",
                        regressions_coef = "matrix",
                        fitted_values = "matrix",
                        resi = "matrix",
                        n = "numeric",
                        p = "numeric",
                        dof = "numeric",
                        regressions_var = "matrix",
                        resi_var = "matrix",
                        t_value = "matrix",
                        m_formula ="formula",
                        m_data = "character"),

#-------------------------------------------------------------------------
                      methods = list
                      (
                        #This function will calculate necessary parameters
                        initialize = function(formula, data)
                        {
                          #Generate X and y
                          X <<- model.matrix(formula, data)
                          y <<- as.matrix(data[all.vars(formula)[1]])

                          #Regressions coeficients
                          regressions_coef<<- as.matrix(solve(t(X)%*%X) %*% t(X)%*%y)

                          fitted_values <<- X%*%regressions_coef

                          #Residuals
                          resi <<- y - fitted_values

                          #Degrees of fredom
                          n <<- length(X[,1])
                          p <<- length(X[1,])
                          dof <<- n - p

                          #Variance of the regression coeffcients
                          resi_var <<- (t(resi) %*% resi)/dof
                          regressions_var <<- as.numeric(resi_var) * solve(t(X) %*% X)

                          t_value <<- regressions_coef/as.double(sqrt(resi_var))

                          #Metadata
                          m_formula <<- formula
                          m_data <<- deparse(substitute(data))
                        },

                        #The print method
                        print = function() {
                          cat(paste("Call: \n"))
                          cat(paste("linreg(formula = ",format(m_formula), ", data = ", m_data, ")\n\n", sep = ""))
                          cat(paste("Coefficients:\n"))

                          coef <- structure(as.vector(regressions_coef), names= format(rownames(regressions_coef)))
                          my_print(coef)
                        },

                        #The plot method
                        plot = function()
                        {
                          library(ggplot2)

                          linkoping_theme <-
                            theme(
                              plot.margin = unit(c(1.2,1.2,1.2,1.2), "cm"),
                              panel.background = element_rect(fill="#3dd2dc"),
                              panel.grid.major.y = element_blank(),
                              panel.grid.minor.y = element_blank(),
                              panel.grid.major.x = element_blank(),
                              panel.grid.minor.x = element_blank(),
                              axis.line = element_line(color= "#58585b", size=1),
                              axis.text.x = element_text(color="#1d1d1b", size="11"),
                              axis.text.y = element_text(color="#1d1d1b", size="11"),
                              axis.title.x = element_text(color="Black", size="12", face="bold.italic"),
                              axis.title.y = element_text(color="Black", size="12", face="bold.italic"),
                              axis.ticks.y = element_blank(),
                              axis.ticks.x = element_line(color = "#9D9ADC", size = 0.3),
                              plot.title = element_text(color="blue2", face="bold", size="14"),
                              legend.position="bottom", legend.title = element_blank(),
                              legend.text = element_text(color="Black", size="12")
                            )

                          #Plot 1
                          residuals_vs_fitted <- ggplot(data.frame(resi, fitted_values), aes(x=fitted_values, y=resi)) +
                            geom_point() +
                            stat_smooth(method='lm', colour="red", se=FALSE, span = 1) +
                            xlab(paste("Fitted Values\n", "linreg(", format(m_formula), ")", ""))+
                            ylab("Residuals") +
                            ggtitle("Residuals vs Fitted") +
                            linkoping_theme

                          #Print plot 1
                          my_print(residuals_vs_fitted)

                          #--------------------------------
                          #Prepare for plot 2
                          std_vs_fitted <- as.data.frame(cbind(sqrt(abs(resi-mean(resi))), fitted_values))
                          names(std_vs_fitted) = c("Standardized_residuals", "fitted_values")
                          y_plot <- std_vs_fitted[,1]

                          #Plot 2
                          plot2 <- ggplot(std_vs_fitted, aes(x = fitted_values, y = y_plot))+
                            geom_point() +
                            stat_smooth(method='lm', colour="red", se=FALSE, span = 1) +
                            xlab(paste("Fitted Values\n", "linreg(", format(m_formula), ")", ""))+
                            ylab(expression(sqrt("|Standardized residuals|"))) +
                            ggtitle("Scale Location") +
                            linkoping_theme

                          #Print plot2
                          my_print(plot2)
                        },
                        resid = function(){
                          return(resi)
                        },
                        pred = function()
                        {
                          return(fitted_values)
                        },
                        coef = function(){
                          coef <- structure(as.vector(regressions_coef), names= row.names(regressions_coef))
                          return(coef)
                        },

                        #Summary method
                        summary = function()
                        {
                          cat(paste("Call: \n\n"))
                          cat(paste("linreg(formula = ",format(m_formula), ", data = ", m_data, ")\n\n", sep = ""))
                          cat(paste("Coefficients:\n\n"))

                          regressions_var_diagonal <- diag(regressions_var)
                          table = data.frame(matrix(ncol = 5, nrow = 0))
                          for (i in 1:length(regressions_coef))
                          {
                            this_t_value = regressions_coef[i]/sqrt(regressions_var_diagonal[i])
                            this_p_value = 2*pt(abs(this_t_value), dof, lower.tail = FALSE)
                            row = data.frame(round(regressions_coef[i], 2), round(sqrt(regressions_var[i, i]), 2), round(this_t_value, 2), formatC(this_p_value, format = "e", digits = 2), write_star(this_p_value))
                            row.names(row)[1] = row.names(regressions_coef)[i]
                            table = rbind(table, row)
                          }
                          colnames(table) = c("Estimate", "Std. Error", "t value", "Pr(>|t|)", "")
                          my_print(table)

                          cat(paste("\n"))
                          cat(paste("Residual standard error: ",round(sd(resi),3), " on ", dof, " degrees of freedom", sep = ""))
                        }

                      )
)

#'my_print function
#'Our own print function, because we can't call the default function inside RC object
#'@examples print("This is an example!")
#'@param x is the input.

my_print = function(x) {
    print(x)
}

#'write_star function
#'generate the start base on p_value, follow the rule of lm function
#'@param p_value the input
write_star = function(p_value) {
  if (p_value > 0.1)
    output <- ""
  else if (p_value > 0.05)
    output <- "."
  else if (p_value > 0.01)
    output <- "*"
  if (p_value > 0.001)
    output <- "**"
  else  output <- "***"
  return(output)
}

#How to use:
#1. Run this file (both linreg object and the my_print, write_star function)
#2. mod_object <- linreg(Petal.Length~Species, data = iris)
#3. mod_object$print()
#4. mod_object$plot()
#5. mod_object$summary()

#a <- linreg$new(Petal.Length~Sepal.Width+Sepal.Length, data = iris)

This code is not the latest version - Update when fails are removed !



Philhoels/Lab4AdvR documentation built on May 23, 2019, 6:02 p.m.