In pedism/LR: Linear Regression using RC class

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

linreg <- setRefClass(Class = "linreg",


                      fields = list(formula="formula", data="data.frame", regco="matrix", yf="matrix", e="matrix", dfreedom="numeric", 
                                    Sigma_square="numeric", Var_Beta="matrix", t_Beta="matrix", pvalue="matrix",
                                    parse="character", stand_res="matrix",variance="numeric"),



                      methods = list(

                        initialize =function (formula,data)
                        {
                          c<-colnames(data)
                          d<-all.vars(formula)
                          stopifnot(d %in% c)
                          stopifnot (is.data.frame(data))
                          formula <<- formula
                          data <<- data
                          X <- model.matrix(formula,data)
                          dep_y <- all.vars(formula)[1]
                          y <- (data[,dep_y])
                          parse <<- deparse(substitute(data))
                          #Regressions coefficients
                          regco <<- solve((t(X)%*%X))%*%t(X)%*%y
                          #X <- QR
                          #Beta <- solve(R)%*%t(Q)%*%y
                          #Fitted values
                          yf <<- X%*%regco
                          #Residuals
                          e <<- y-yf
                          #Degrees of freedom
                          dfreedom <<- nrow(X)-ncol(X)
                          #Residual variance
                          Sigma_square <<- as.numeric((t(e)%*%e) / dfreedom)
                          #Variance of regression coefficients
                          Var_Beta <<- Sigma_square * solve((t(X)%*%X))
                          #t-values for each coefficient
                          t_Beta <<- regco / sqrt(diag(Var_Beta))
                          #p values for reg coefficients
                          pvalue <<- pt(abs(t_Beta),dfreedom)
                          #variance value
                          variance <<- round(sqrt(Sigma_square),2)
                          #standardised residual for plot2
                          stand_res <<- sqrt(abs((e-mean(e)) / sqrt(Sigma_square)))

                        },
                        #Methods
                        #print(),plot(), resid(),pred(),coef(),summary()
                        print = function(){
                          regco

                        },


                        plot = function(){
                          library(ggplot2)
                          #plotting yf and e
                          data_frame1 <- data.frame(Fitted_values=yf,Residuals=e)
                          p1 <- ggplot(data_frame1,aes(Fitted_values,Residuals))+
                            geom_point()+geom_abline()+
                            ggtitle("Residuals vs Fitted")+
                            xlab("Fitted values lm(Petal.Length ~ Species)")+
                            ylab("Residuals")

                          data_frame2 <- data.frame(Fitted_values=yf,Residuals=stand_res)
                          p2 <- ggplot(data_frame2,aes(Fitted_values,Residuals))+
                            geom_point()+geom_abline()+
                            ggtitle("Scale-Location")+
                            xlab("Fitted values lm(Petal.Length ~ Species)")+
                            ylab(expression(bold(sqrt(bold("Standardized Residuals")))) )
                          return(list(p1, p2))
                        },

                        resid = function(){
                          cat("Returning vector of residuals e:", "\n")
                          return(as.vector(round(e,2)))
                        },

                        pred = function(){
                          cat("Returning predicted values yf:", "\n")
                          return(as.vector(round(yf,2)))
                        },

                        coef = function(){
                          cat("Returning coefficients as a vector:", "\n")
                          return(as.vector(round(Beta,2)))
                        },

                        summary = function(){

                          base::summary()


                        }



                      ))

data <- iris
linreg <- function (formula,data)
                        {
                          c<-colnames(data)
                          d<-all.vars(formula)
                          stopifnot(d %in% c)
                          stopifnot (is.data.frame(data))
                          formula <<- formula
                          data <<- data
                          X <- model.matrix(formula,data)
                          dep_y <- all.vars(formula)[1]
                          y <- (data[,dep_y])
                          parse <<- deparse(substitute(data))
                          #Regressions coefficients
                          regco <<- solve((t(X)%*%X))%*%t(X)%*%y
                          #X <- QR
                          #Beta <- solve(R)%*%t(Q)%*%y
                          #Fitted values
                          yf <<- X%*%regco
                          #Residuals
                          e <<- y-yf
                          #Degrees of freedom
                          dfreedom <<- nrow(X)-ncol(X)
                          #Residual variance
                          Sigma_square <<- as.numeric((t(e)%*%e) / dfreedom)
                          #Variance of regression coefficients
                          Var_Beta <<- Sigma_square * solve((t(X)%*%X))
                          #t-values for each coefficient
                          t_Beta <<- regco / sqrt(diag(Var_Beta))
                          #p values for reg coefficients
                          pvalue <<- pt(abs(t_Beta),dfreedom)
                          #variance value
                          variance <<- round(sqrt(Sigma_square),2)
                          #standardised residual for plot2
                          stand_res <<- sqrt(abs((e-mean(e)) / sqrt(Sigma_square)))

                        }

This vignette summarizes functions of LR package and gives examples on how to use them.

DATA

To explore the basic functions of LR, we'll use iris.This is perhaps the best known database to be found in the pattern recognition literature. The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other. you can see first 10 rows below:

head(iris,10)

Included functions

LR has a vary of functions to provide analysis on data.

*print()   prints out the coefficients and coefficient names.
*plot()    plots  two plots using ggplot2
*resid()   returns the vector of residuals e.
*pred()    returns the predicted values ^y.
*coef()    returns the coefficients as a named vector.
*summary() returns a similar printout as printed for lm objects

Print()

prints out the coefficients and coefficient names, similar as done by the lm class.

data(iris)
mod_object <- lm(Petal.Length~Species, data = iris)
print(mod_object)

Plot()

This function plots two plots using ggplot2 shown below:

 library(ggplot2)
data <- iris
formula = Petal.Length ~ Species
                          c<-colnames(data)
                          d<-all.vars(formula)
                          stopifnot(d %in% c)
                          stopifnot (is.data.frame(data))

                          X <- model.matrix(formula,data)
                          dep_y <- all.vars(formula)[1]
                          y <- (data[,dep_y])
                          parse<- deparse(substitute(data))
                          #Regressions coefficients
                          regco <<- solve((t(X)%*%X))%*%t(X)%*%y
                          #X <- QR
                          #Beta <- solve(R)%*%t(Q)%*%y
                          #Fitted values
                          yf <<- X%*%regco
                          #Residuals
                          e <<- y-yf
                          #Degrees of freedom
                          dfreedom <<- nrow(X)-ncol(X)
                          #Residual variance
                          Sigma_square <<- as.numeric((t(e)%*%e) / dfreedom)
                          #Variance of regression coefficients
                          Var_Beta <<- Sigma_square * solve((t(X)%*%X))
                          #t-values for each coefficient
                          t_Beta <<- regco / sqrt(diag(Var_Beta))
                          #p values for reg coefficients
                          pvalue <<- pt(abs(t_Beta),dfreedom)
                          #variance value
                          variance <<- round(sqrt(Sigma_square),2)
                          #standardised residual for plot2
                          stand_res <<- sqrt(abs((e-mean(e)) / sqrt(Sigma_square)))

                          #plotting yf and e
                          data_frame1 <- data.frame(Fitted_values=yf,Residuals=e)
                          p1 <- ggplot(data_frame1,aes(Fitted_values,Residuals))+
                            geom_point()+geom_abline()+
                            ggtitle("Residuals vs Fitted")+
                            xlab("Fitted values lm(Petal.Length ~ Species)")+
                            ylab("Residuals")

                          data_frame2 <- data.frame(Fitted_values=yf,Residuals=stand_res)
                          p2 <- ggplot(data_frame2,aes(Fitted_values,Residuals))+
                            geom_point()+geom_abline()+
                            ggtitle("Scale-Location")+
                            xlab("Fitted values lm(Petal.Length ~ Species)")+
                            ylab(expression(bold(sqrt(bold("Standardized Residuals")))) )
                          return(list(p1, p2))

resid()

this function returns the vector of residuals e.

resid(cat("Returning vector of residuals e:", "\n"))
                          return(as.vector(round(e,2)))

pred()

returns the predicted values of y_hat.

             pred = function(){
                          cat("Returning predicted values yf:", "\n")
                          return(as.vector(round(yf,2)))
                        }
                          return(as.vector(round(yf,2)))

coef()

This function returns the cofficients as a named vector

                        coef = function(){
                          cat("Returning coefficients as a vector:", "\n")
                          return(as.vector(round(regco,2)))
                        }
return(as.vector(round(regco,2)))

summary()

this function returns a similar printout as printed for lm objects, but you only need to present the coefficients with their standard error, t-value and p-value as well as the estimate of sigma and the degrees of freedom in the model.

pedism/LR documentation built on May 9, 2019, 5:57 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Tweet to @rdrrHQ

GitHub issue tracker

ian@mutexlabs.com