knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(package.v1)
library(car)
library(bench)
library(microbenchmark)

Introduction

This package focuses on linear regression calculation. With it, you can do model fitting, prediction, general linear hypothesis test, and residual calculations. There are six main functions in this package, which are linear_model(), predict_mock(), GLH(), residuals(), dffit(), and Cooks().

Here, we use the built-in toy data set, Davis, from package car to demonstrate the usages of each function.

Toy Dataset

Load in data set Davis.

data <- Davis
head(data)

Fit model

Say we are interested in studying students' weight adjusted for sex, height, repwt. Theoretically, our model is: $Weight = \beta_0 + \beta_1sex + \beta_2height + \beta_3repwt + \epsilon$ with $\epsilon \sim N(0,\sigma^2)$

First, we use self-defined model 'linear_model'

mod <- linear_model(weight ~ repwt + height + sex, data = data)

Now, we compare it with original lm function

mod2 <- lm(weight ~ repwt + height + sex, data = data)
summary(mod2)

Correctness

Now compare two models using all.equal function.

# coefficients
all.equal(as.vector(mod$Coefficients), as.vector(mod2$coefficients))

# standard error
all.equal(as.vector(mod$s.e), as.vector(summary(mod2)$coefficients[,2]))

# t value
all.equal(as.vector(mod$t_value), as.vector(summary(mod2)$coefficients[,3]))

# p value
all.equal(as.vector(mod$p_value), as.vector(summary(mod2)$coefficients[,4]))

# r square
all.equal(mod$R2[[1]], summary(mod2)$r.squared)

# adjusted r square
all.equal(mod$R2[[2]], summary(mod2)$adj.r.squared)

Efficiency

compare =  bench::mark(as.vector(linear_model(weight ~ repwt + height + sex, data = data)$Coefficients), as.vector(lm(weight ~ repwt + height + sex, data = data)$coefficients))
res <- microbenchmark::microbenchmark(as.vector(linear_model(weight ~ repwt + height + sex, data = data)$Coefficients), as.vector(lm(weight ~ repwt + height + sex, data = data)$coefficients))
print(compare)
plot(res)

Prediction

I manually generate 5 new observations.

newobs <- data.frame("repwt" = c(50, 70, 70, 85, 63),
                     "height" = c(165, 189, 178, 150, 180),
                     "sex" = c('F', 'M', 'M', 'F', 'F'))

Again, I use self-defined function first

predict_mock(mod, newobs)

The results given by R function:

predict(mod2, newobs)

Correctness

all.equal(as.vector(predict_mock(mod, newobs)), as.vector(predict(mod2, newobs)))

Efficiency

bench::mark(as.vector(predict_mock(mod, newobs)), as.vector(predict(mod2, newobs)))
plot(microbenchmark::microbenchmark(as.vector(predict_mock(mod, newobs)), as.vector(predict(mod2, newobs))))

General Linear Hypothesis test

I will test following hypotheses

$H_0: \beta_{repwt} + 2*\beta_{weight} = 0 \space and \space \beta_{sex} = 0$

$H_1: not \space H_0$

#Corresponding T matrix
(T = matrix(c(0,0,1,0,2,0,0,1), nrow = 2))

Results provided by self-defined function

GLH(mod, T)

Results from linearHypothesis function in package car.

linearHypothesis(mod2, T)

Correctness

all.equal(GLH(mod, T)[[1]], linearHypothesis(mod2, T)[2,5])

Efficiency

bench::mark(GLH(mod, T)[[1]], linearHypothesis(mod2, T)[2,5])
plot(microbenchmark::microbenchmark(GLH(mod, T)[[1]], linearHypothesis(mod2, T)[2,5]))

Residual Calculation

The results of implemented function are presented first.

res <- residuals(mod)
head(res)

Correctness

# unstandardized residuals
all.equal(as.vector(res$residuals), as.vector(mod2$residuals))

# standardized residuals
all.equal(as.vector(res$standardized.residuals), as.vector(mod2$residuals/summary(mod2)$sigma))

# internal
all.equal(as.vector(res$internal.studentize), as.vector(rstandard(mod2)))

# external
all.equal(as.vector(res$external.studentize), as.vector(rstudent(mod2)))

Efficiency

origin <- function(mod) {
  results <- data.frame("residuals" = as.vector(mod$residuals),
                        "standardized.residuals" = as.vector(mod$residuals/summary(mod)$sigma),
                        "internal.studentize" = as.vector(rstandard(mod)),
                        "external.studentize" = as.vector(rstudent(mod)))

  return(results)
}
bench::mark(residuals(mod), origin(mod2))
plot(microbenchmark::microbenchmark(residuals(mod), origin(mod2)))

DFFIT Calculation

The implemented function's results are presented first

dffit(mod)[1:10]

Below are results of function dffits in stats package.

stats::dffits(mod2)[1:10]

Correctness

all.equal(as.vector(dffit(mod)), as.vector(dffits(mod2)))

Efficiency

bench::mark(as.vector(dffit(mod)), as.vector(dffits(mod2)))
plot(microbenchmark::microbenchmark(as.vector(dffit(mod)), as.vector(dffits(mod2))))

Cook's distance

Still self-defined function are present first

Cooks(mod)[1:10]
stats::cooks.distance(mod2)[1:10]

Correctness

all.equal(as.vector(Cooks(mod)), as.vector(cooks.distance(mod2)))

Efficiency

bench::mark(as.vector(Cooks(mod)), as.vector(cooks.distance(mod2)))
plot(microbenchmark::microbenchmark(as.vector(Cooks(mod)), as.vector(cooks.distance(mod2))))


YuxuanChen0824/R_package documentation built on Dec. 18, 2021, 8:24 p.m.