knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(package.v1) library(car) library(bench) library(microbenchmark)
This package focuses on linear regression calculation. With it, you can do model fitting, prediction, general linear hypothesis test, and residual calculations. There are six main functions in this package, which are linear_model()
, predict_mock()
, GLH()
, residuals()
, dffit()
, and Cooks()
.
Here, we use the built-in toy data set, Davis
, from package car
to demonstrate the usages of each function.
Load in data set Davis
.
data <- Davis head(data)
Say we are interested in studying students' weight adjusted for sex, height, repwt. Theoretically, our model is: $Weight = \beta_0 + \beta_1sex + \beta_2height + \beta_3repwt + \epsilon$ with $\epsilon \sim N(0,\sigma^2)$
First, we use self-defined model 'linear_model'
mod <- linear_model(weight ~ repwt + height + sex, data = data)
Now, we compare it with original lm
function
mod2 <- lm(weight ~ repwt + height + sex, data = data) summary(mod2)
Now compare two models using all.equal
function.
# coefficients all.equal(as.vector(mod$Coefficients), as.vector(mod2$coefficients)) # standard error all.equal(as.vector(mod$s.e), as.vector(summary(mod2)$coefficients[,2])) # t value all.equal(as.vector(mod$t_value), as.vector(summary(mod2)$coefficients[,3])) # p value all.equal(as.vector(mod$p_value), as.vector(summary(mod2)$coefficients[,4])) # r square all.equal(mod$R2[[1]], summary(mod2)$r.squared) # adjusted r square all.equal(mod$R2[[2]], summary(mod2)$adj.r.squared)
compare = bench::mark(as.vector(linear_model(weight ~ repwt + height + sex, data = data)$Coefficients), as.vector(lm(weight ~ repwt + height + sex, data = data)$coefficients)) res <- microbenchmark::microbenchmark(as.vector(linear_model(weight ~ repwt + height + sex, data = data)$Coefficients), as.vector(lm(weight ~ repwt + height + sex, data = data)$coefficients))
print(compare) plot(res)
I manually generate 5 new observations.
newobs <- data.frame("repwt" = c(50, 70, 70, 85, 63), "height" = c(165, 189, 178, 150, 180), "sex" = c('F', 'M', 'M', 'F', 'F'))
Again, I use self-defined function first
predict_mock(mod, newobs)
The results given by R function:
predict(mod2, newobs)
all.equal(as.vector(predict_mock(mod, newobs)), as.vector(predict(mod2, newobs)))
bench::mark(as.vector(predict_mock(mod, newobs)), as.vector(predict(mod2, newobs))) plot(microbenchmark::microbenchmark(as.vector(predict_mock(mod, newobs)), as.vector(predict(mod2, newobs))))
I will test following hypotheses
$H_0: \beta_{repwt} + 2*\beta_{weight} = 0 \space and \space \beta_{sex} = 0$
$H_1: not \space H_0$
#Corresponding T matrix (T = matrix(c(0,0,1,0,2,0,0,1), nrow = 2))
Results provided by self-defined function
GLH(mod, T)
Results from linearHypothesis
function in package car
.
linearHypothesis(mod2, T)
all.equal(GLH(mod, T)[[1]], linearHypothesis(mod2, T)[2,5])
bench::mark(GLH(mod, T)[[1]], linearHypothesis(mod2, T)[2,5]) plot(microbenchmark::microbenchmark(GLH(mod, T)[[1]], linearHypothesis(mod2, T)[2,5]))
The results of implemented function are presented first.
res <- residuals(mod) head(res)
# unstandardized residuals all.equal(as.vector(res$residuals), as.vector(mod2$residuals)) # standardized residuals all.equal(as.vector(res$standardized.residuals), as.vector(mod2$residuals/summary(mod2)$sigma)) # internal all.equal(as.vector(res$internal.studentize), as.vector(rstandard(mod2))) # external all.equal(as.vector(res$external.studentize), as.vector(rstudent(mod2)))
origin <- function(mod) { results <- data.frame("residuals" = as.vector(mod$residuals), "standardized.residuals" = as.vector(mod$residuals/summary(mod)$sigma), "internal.studentize" = as.vector(rstandard(mod)), "external.studentize" = as.vector(rstudent(mod))) return(results) }
bench::mark(residuals(mod), origin(mod2)) plot(microbenchmark::microbenchmark(residuals(mod), origin(mod2)))
The implemented function's results are presented first
dffit(mod)[1:10]
Below are results of function dffits
in stats
package.
stats::dffits(mod2)[1:10]
all.equal(as.vector(dffit(mod)), as.vector(dffits(mod2)))
bench::mark(as.vector(dffit(mod)), as.vector(dffits(mod2))) plot(microbenchmark::microbenchmark(as.vector(dffit(mod)), as.vector(dffits(mod2))))
Still self-defined function are present first
Cooks(mod)[1:10]
stats::cooks.distance(mod2)[1:10]
all.equal(as.vector(Cooks(mod)), as.vector(cooks.distance(mod2)))
bench::mark(as.vector(Cooks(mod)), as.vector(cooks.distance(mod2))) plot(microbenchmark::microbenchmark(as.vector(Cooks(mod)), as.vector(cooks.distance(mod2))))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.