knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
This package is an implementation of linear regression using linear algebra.
Within this vignette, the example dataset trees
is used.
The package includes one function called linreg(formula, data)
which returns a Reference Class object.
library(ggplot2) linreg <- setRefClass("linreg", fields = list(formula="formula", data="data.frame", m_X = "matrix", m_Y="matrix", Xt="matrix", XtX="matrix", betaestimates = "matrix", yfit="matrix", residual="matrix", nparameters = "integer", dof="integer", residualt = "matrix", residualvariance="numeric", residualstd = "numeric", betavariance = "matrix", bb = "numeric", tvalues="matrix", pvalues= "matrix", standardizedresiduals ="matrix", sqrtstresiduals = "matrix", export_formula = "formula", export_data = "character" ), methods = list( initialize = function(formula, data){ #Independent and dependent variables m_X <<- model.matrix(formula, data) m_Y <<- as.matrix(data[all.vars(formula)[1]]) # Transpose matrix X, and multiply + solve Xt <<- t(m_X) XtX <<- solve(Xt %*% m_X) betaestimates <<- XtX %*% Xt %*% m_Y # Estimate y yfit <<- m_X %*% betaestimates # Estimate residuals residual <<- m_Y - yfit # Determine degrees of freedom nparameters <<- length(betaestimates) dof <<- length(m_Y) - nparameters # Variances residualt <<- t(residual) residualvariance <<- as.numeric((residualt %*% residual) / dof) residualstd <<- sqrt(residualvariance) betavariance <<- residualvariance * XtX bb <<- diag(betavariance) # t-values tvalues <<- betaestimates/sqrt(bb) # p-values pvalues <<- 2 * pt(abs(tvalues), dof, lower.tail = FALSE) # Standardized residuals for summary standardizedresiduals <<- residual / sd(residual) sqrtstresiduals <<- sqrt(abs(standardizedresiduals)) # saving names export_formula <<- formula export_data <<- deparse(substitute(data)) }, print = function(){ cat(paste("linreg(formula = ", format(export_formula), ", data = ", export_data , ")\n\n ", sep = "")) setNames(round(betaestimates[1:nrow(betaestimates)],3),rownames(betaestimates)) }, resid = function(){ return(as.vector(residual)) }, pred = function(){ return(yfit) }, coef = function(){ vec <- as.vector(betaestimates) names(vec) <- colnames(m_X) return(vec) }, plot = function(){ plot1 <- ggplot(data.frame(yfit, residual), aes(y=residual, x=yfit)) + geom_point(shape=21, size=3, colour="black", fill="white") plot1 <- plot1 + theme_linedraw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), plot.title = element_text(hjust = 0.5)) plot1 <- plot1 + stat_summary(fun.y=median, colour="red", geom="line", aes(group = 1)) plot1 <- plot1 + ggtitle("Residuals vs fitted") + xlab(paste("Fitted values \n lm(Petal.Length ~ Species)")) plot2 <- ggplot(data.frame(yfit, sqrtstresiduals), aes(y=sqrtstresiduals, x=yfit)) + geom_point(alpha = 0.6, shape=21, size=3, colour="black", fill="white") plot2 <- plot2 + theme_linedraw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), plot.title = element_text(hjust = 0.5)) plot2 <- plot2 + stat_summary(fun.y=median, colour="red", geom="line", aes(group = 1)) plot2 <- plot2 + ggtitle("Scale-Location") + xlab(paste("Fitted values \n lm(Petal.Length ~ Species)")) plot2 <- plot2 + scale_x_continuous(breaks = seq(0.0, 1.5, by= 0.5)) plotlist <- list(plot1, plot2) return(plotlist) }, summary = function(){ "Prints the summary of linear regression model." cat(paste("linreg(formula = ", format(export_formula), ", data = ", export_data, ") :\n\n ", sep = "")) x <- setNames(as.data.frame(cbind(betaestimates,as.matrix(sqrt(bb)),tvalues, formatC(pvalues, format = "e", digits = 2), p_star_cal(pvalues))), c("Coefficients","Standard error","t values", "p values", "")) myPrint(x) cat(paste("\n\nResidual standard error: ", residualstd, " on ", dof, " degrees of freedom: ", sep = "")) } )) myPrint = function(x, stripoff = FALSE) { print(x) } p_star_cal = function(p_value) { x <- ifelse(p_value > 0.1, " ", (ifelse(p_value > 0.05, " . ", (ifelse(p_value > 0.01, "*", (ifelse(p_value > 0.001, "**","***"))))))) return(x) }
linregObject = linreg(Volume ~ ., trees)
To work with this object, several methods can be used:
print()
resid()
pred()
coef()
plot()
summary()
Returns the coefficients of the linear regression model.
linregObject$print()
Returns the vector of all resisduals.
head(linregObject$resid())
Returns the predicted values.
head(linregObject$pred())
Returns the coefficients as a named vector.
linregObject$coef()
Returns two plots related to the residuals.
linregObject$plot()
Returns a summary of the applied linregFunction.
linregObject$summary()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.