knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

This package is an implementation of linear regression using linear algebra. Within this vignette, the example dataset trees is used.

Usage of the package

The package includes one function called linreg(formula, data) which returns a Reference Class object.

library(ggplot2)
linreg <- setRefClass("linreg",
                      fields = list(formula="formula",
                                    data="data.frame",
                                    m_X = "matrix",
                                    m_Y="matrix",
                                    Xt="matrix",
                                    XtX="matrix",
                                    betaestimates = "matrix",
                                    yfit="matrix",
                                    residual="matrix",
                                    nparameters = "integer",
                                    dof="integer",
                                    residualt = "matrix",
                                    residualvariance="numeric",
                                    residualstd = "numeric",
                                    betavariance = "matrix",
                                    bb = "numeric",
                                    tvalues="matrix",
                                    pvalues= "matrix",
                                    standardizedresiduals ="matrix",
                                    sqrtstresiduals = "matrix",
                                    export_formula = "formula",
                                    export_data = "character"
                                    ),
                      methods = list(
                        initialize = function(formula, data){
                          #Independent and dependent variables
                          m_X <<- model.matrix(formula, data)
                          m_Y <<- as.matrix(data[all.vars(formula)[1]])
                          # Transpose matrix X, and multiply + solve
                          Xt <<- t(m_X)
                          XtX <<- solve(Xt %*% m_X)
                          betaestimates <<- XtX %*% Xt %*% m_Y
                          # Estimate y
                          yfit <<- m_X %*% betaestimates
                          # Estimate residuals
                          residual <<- m_Y - yfit
                          # Determine degrees of freedom
                          nparameters <<- length(betaestimates)
                          dof <<- length(m_Y) - nparameters
                          # Variances
                          residualt <<- t(residual)
                          residualvariance <<- as.numeric((residualt %*% residual) / dof)
                          residualstd <<- sqrt(residualvariance)
                          betavariance <<- residualvariance * XtX
                          bb <<- diag(betavariance)
                          # t-values
                          tvalues <<- betaestimates/sqrt(bb)
                          # p-values
                          pvalues <<- 2 * pt(abs(tvalues), dof, lower.tail = FALSE)
                          # Standardized residuals for summary
                          standardizedresiduals <<- residual / sd(residual)
                          sqrtstresiduals <<- sqrt(abs(standardizedresiduals))

                          # saving names
                          export_formula <<- formula
                          export_data <<- deparse(substitute(data))
                        },
                        print = function(){
                          cat(paste("linreg(formula = ", format(export_formula), ", data = ", export_data , ")\n\n ", sep = ""))
                          setNames(round(betaestimates[1:nrow(betaestimates)],3),rownames(betaestimates))
                        },
                        resid = function(){
                          return(as.vector(residual))
                        },
                        pred = function(){
                          return(yfit)
                        },
                        coef = function(){
                          vec <- as.vector(betaestimates)
                          names(vec) <- colnames(m_X)
                          return(vec)
                        },
                        plot = function(){
                          plot1 <- ggplot(data.frame(yfit, residual), aes(y=residual, x=yfit)) + geom_point(shape=21, size=3, colour="black", fill="white")
                          plot1 <- plot1 + theme_linedraw() + theme(panel.grid.major = element_blank(),
                                                                            panel.grid.minor = element_blank(),
                                                                            plot.title = element_text(hjust = 0.5))
                          plot1 <- plot1 + stat_summary(fun.y=median, colour="red", geom="line", aes(group = 1))
                          plot1 <- plot1 + ggtitle("Residuals vs fitted") + xlab(paste("Fitted values \n lm(Petal.Length ~ Species)"))
                          plot2 <- ggplot(data.frame(yfit, sqrtstresiduals), aes(y=sqrtstresiduals, x=yfit)) + geom_point(alpha = 0.6, shape=21, size=3, colour="black", fill="white")
                          plot2 <- plot2 + theme_linedraw() + theme(panel.grid.major = element_blank(),
                                                                                 panel.grid.minor = element_blank(),
                                                                                 plot.title = element_text(hjust = 0.5))
                          plot2 <- plot2 + stat_summary(fun.y=median, colour="red", geom="line", aes(group = 1))
                          plot2 <- plot2 + ggtitle("Scale-Location") + xlab(paste("Fitted values \n lm(Petal.Length ~ Species)"))
                          plot2 <- plot2 + scale_x_continuous(breaks = seq(0.0, 1.5, by= 0.5))

                          plotlist <- list(plot1, plot2)
                          return(plotlist)
                        },
                        summary = function(){
                          "Prints the summary of linear regression model."
                          cat(paste("linreg(formula = ", format(export_formula), ", data = ", export_data, ") :\n\n ", sep = ""))
                          x <- setNames(as.data.frame(cbind(betaestimates,as.matrix(sqrt(bb)),tvalues, formatC(pvalues, format = "e", digits = 2), p_star_cal(pvalues))), c("Coefficients","Standard error","t values", "p values", ""))
                          myPrint(x)
                          cat(paste("\n\nResidual standard error: ", residualstd, " on ", dof, " degrees of freedom: ", sep = ""))
                        }
                      ))
myPrint = function(x, stripoff = FALSE) {
    print(x)
}
p_star_cal = function(p_value) {
  x <- ifelse(p_value > 0.1, " ",
              (ifelse(p_value > 0.05, " . ",
                      (ifelse(p_value > 0.01, "*",
                              (ifelse(p_value > 0.001, "**","***")))))))
  return(x)
}
linregObject = linreg(Volume ~ ., trees)

To work with this object, several methods can be used:

print()

Returns the coefficients of the linear regression model.

linregObject$print()

resid()

Returns the vector of all resisduals.

head(linregObject$resid())

pred()

Returns the predicted values.

head(linregObject$pred())

coef()

Returns the coefficients as a named vector.

linregObject$coef()

plot()

Returns two plots related to the residuals.

linregObject$plot()

summary()

Returns a summary of the applied linregFunction.

linregObject$summary()


Thijsq/rlab4 documentation built on May 19, 2019, 8:55 p.m.