knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(myLM)

Installation:

#devtools::install_github("wjhlang/myLM")
library(myLM)

Basic usage of function myLM:

To explore the usage of myLM, we'll use the dataset iris. This dataset contains r nrow(data("iris")) observations and r ncol(data("iris")) columns. We'll also be using simulated data in the examples later on.

data("iris")
x = iris$Petal.Width
y = iris$Petal.Length
myLM(y~x)
# Equivalently

myLM(Petal.Length~Petal.Width, data = iris)

# This gives the exact same result as the original lm function
lm(Petal.Length~Petal.Width, data = iris)

Simulated dataset

y = x%*%rnorm(5,1,10) + rnorm(10000,0,0.5)
x = matrix(rnorm(50000), nrow=10000) 
simdata = as.data.frame(cbind(y,x))
colnames(simdata) = c("y", "x1", "x2", "x3", "x4", "x5")
data("simdata")

Usage of different parameters

Usage of weights

# Get random numbers from the uniform distribution as the weights
rand = runif(nrow(iris))
myLM(Petal.Length~Petal.Width,data = iris, weights = rand)

# Compare with the lm function
lm(Petal.Length~Petal.Width,data = iris, weights = rand)

# On simulated dataset
# Get random numbers from the uniform distribution as the weights
rand = runif(nrow(simdata))
myLM(y~x1+x2+x3+x4+x5, data = simdata, weights = rand)

# Compare with the lm function
lm(y~., data = simdata,weights = rand)

Usage of subset

myLM(Petal.Length~Petal.Width,data = iris, subset = "Petal.Width<2")

# Compare with the lm function
lm(Petal.Length~Petal.Width,data = iris, subset = Petal.Width<2)

# On simulated dataset
rand = runif(nrow(simdata))
myLM(y~x1+x2+x3+x4+x5, data = simdata, subset = "x5<3")

# Compare with the lm function
lm(y~., data = simdata,subset = x5<3)

Usage of method

head(myLM(Petal.Length~Petal.Width,data = iris, method = "model.frame"))

# Compare with the lm function
all.equal(as.matrix(myLM(Petal.Length~Petal.Width,data = iris, method = "model.frame")), as.matrix(lm(Petal.Length~Petal.Width,data = iris, method = "model.frame")))

# On simulated dataset
head(myLM(y~x1+x2+x3+x4+x5, data = simdata, method = "model.frame"))

# Compare with the lm function
comp1 = as.matrix(myLM(y~x1+x2+x3+x4+x5, data = simdata, method = "model.frame"))
colnames(comp1) = NULL
comp2 = as.matrix(lm(y~., data = simdata, method = "model.frame"))
colnames(comp2) = NULL
all.equal(comp1, comp2)

Usage of model: Slight optimization that outputs the interaction term as well.

head(myLM(Petal.Length~Petal.Width*Sepal.Width,data = iris, model = T)$model) # Same as myLM(Petal.Length~Petal.Width,data = iris)

# Compare with the lm function
head(lm(Petal.Length~Petal.Width*Sepal.Width,data = iris, model = T)$model)

# On simulated dataset
head(myLM(y~x1+x2+x3+x4+x5,data = simdata)$model)

# Compare with the lm function
all.equal(as.matrix(myLM(y~x1+x2+x3+x4+x5,data = simdata)$model), as.matrix(lm(y~.,data=simdata)$model))

Use of x, y

head(myLM(Petal.Length~Petal.Width*Sepal.Width,data = iris, x = T, y = T)$x)
head(myLM(Petal.Length~Petal.Width*Sepal.Width,data = iris, x = T, y = T)$y)

# Compare with the lm function
all.equal(as.matrix(myLM(Petal.Length~Petal.Width*Sepal.Width,data = iris, x = T, y = T)$x),as.matrix(lm(Petal.Length~Petal.Width*Sepal.Width,data = iris, x = T, y = T)$x))
all.equal(myLM(Petal.Length~Petal.Width*Sepal.Width,data = iris, x = T, y = T)$y,lm(Petal.Length~Petal.Width*Sepal.Width,data = iris, x = T, y = T)$y)

# On simulated dataset
all.equal(as.matrix(myLM(y~x1+x2+x3+x4+x5,data = simdata, x = T, y = T)$x),as.matrix(lm(y~.,data = simdata, x = T, y = T)$x))
all.equal(myLM(y~x1+x2+x3+x4+x5,data = simdata, x = T, y = T)$y,lm(y~.,data = simdata, x = T, y = T)$y)

Summary (benefites of the 'lm' class)

summary(myLM(Petal.Length~Petal.Width*Sepal.Width + log(Petal.Width),data = iris))
# Compare with the lm function
summary(lm(Petal.Length~Petal.Width*Sepal.Width + log(Petal.Width),data = iris))

# On simulated dataset
summary(myLM(y~x1+x2+x3+x4+x5,data = simdata))
# Compare with the lm function
summary(lm(y~.,data = simdata))

Diagnostic Plots (benefites of the 'lm' class)

par(mfrow=c(2,2))
par(mar=c(2,2,2,2))
plot(myLM(Petal.Length~Petal.Width*Sepal.Width + I(Sepal.Length)^2,data = iris))
# Compare with the lm function
par(mfrow=c(2,2))
par(mar=c(2,2,2,2))
plot(lm(Petal.Length~Petal.Width*Sepal.Width + I(Sepal.Length)^2,data = iris))

# On simulated dataset
par(mfrow=c(2,2))
par(mar=c(2,2,2,2))
plot(myLM(y~x1+x2+x3+x4+x5,data = simdata))
# Compare with the lm function
par(mfrow=c(2,2))
par(mar=c(2,2,2,2))
plot(lm(y~.,data = simdata))

Compare efficiency

formula = Petal.Length~Petal.Width*Sepal.Width + I(Sepal.Length)^2
bnch1 = bench::mark(myLM(formula,data = iris)$coefficients, lm(formula,data = iris)$coefficients)
bnch1[,1] = c("myLM()", "lm()")
bnch1

# On simulated dataset
bnch2 = bench::mark(myLM(y~x1+x2+x3+x4+x5,data = simdata)$coefficients, lm(y~.,data = simdata)$coefficients)
bnch2[,1] = c("myLM()", "lm()")
bnch2

The lack of efficiency might be due to that I'm trying to implement everything that's being outputted in the original function, which slows down the process a bit.



wjhlang/myLM documentation built on Dec. 23, 2021, 5:16 p.m.