knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(bench) library(tidyr) library(ggbeeswarm) library(ggplot2) library(MLRnewbie)
## Import data set mc <- mtcars
## Multiple linear regression with intercept ## We choose "cyl", "disp","hp" in mtcars as X, and "mpg" as Y. model_car <- linearRegression(data = mc, predictors = c("cyl", "disp","hp"), response = "mpg" )
## Calculating useful residuals. From the left to right, they are Residual, Std_Residual, and Studentized_Residual. head(model_residuals(model_car))
## All three sum of squares of the linear regression model ssmodel(model_car)
model_lm <- lm(data = mc, formula = mpg~cyl+disp+hp) summary(model_lm)
anova(model_lm)
### Simulate data set.seed(777) #Generate the independent variable and the error x1=rnorm(50000,100,16) x2=rnorm(50000,200,36) error=rnorm(50000,0,25) #Generate the dependent variable (b0=150, b1=-4, b2=2.5) y1=200-(5*x1)+(10*x2)+error testdata <- as.data.frame(cbind(y1,x1,x2))
## The regression function in MLRnewbie model_nb <- linearRegression(data = testdata, predictors = c("x1","x2"), response = "y1") cat("") ## The base regression function model_base <- lm(data=testdata, formula = y1~x1+x2) summary(model_base)
## Due to rounding error in different calculation, we will only save up to 10 digits. all.equal(round(model_nb[[1]][,1], digits=10), round(model_base$coefficients, digits=10)) cat("") bm <- bench::mark(round(model_nb[[1]][,1], digits=10), round(model_base$coefficients, digits=10)) summary(bm) plot(bm)
Note that there is no such function in base R, so it is meaning less to compare efficiency. Plus, base R does not have a standard residual function and we know that the model is corrected established from the previous code (same MSE). So standard residual should be correct as long as the residuals are the same.
## Correctness Check all.equal(round(model_residuals(model_car)[,3],digits=10), round(rstandard(model_lm),digits = 10)) all.equal(round(model_residuals(model_car)[,1],digits=10), round(model_lm$residuals,digits = 10)) ## Benchmark bm2 <- bench::mark(round(model_residuals(model_car)[,3],digits=10),round(rstandard(model_lm),digits = 10)) summary(bm2) plot(bm2)
There is no sum function in base R, and my function only read off numbers from the model. So there is no point checking efficiency since its complexity will be O(1).
## Correctness check anova(model_lm) anr <- c(sum(anova(model_lm)$"Sum Sq"[-4]), anova(model_lm)$"Sum Sq"[4], anova(model_lm)$"Sum Sq"[4]+sum(anova(model_lm)$"Sum Sq"[-4])) ssmr <- unname(ssmodel(model_car)[,1]) all.equal(anr,ssmr)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.