In jeksterslabds/jeksterslabRlinreg: Jekster's Lab Linear Regression Utilities

knitr::opts_chunk$set(
  error = TRUE,
  collapse = TRUE,
  comment = "#>",
  out.width = "100%"
)

# The Simple Linear Regression Model: The Population Regression Model Example {#simple-population-example}

library(testthat)
library(jeksterslabRlinreg)

varnames <- c("y", "x")
wages <- jeksterslabRdatarepo::wages
x <- wages$education
y <- wages$wages
n <- length(x)
obj <- lm(y ~ x)
beta <- unname(coef(obj))
names(beta) <- c("beta1", "beta2")
# covariance structure
beta1 <- beta[1]
beta2 <- beta[2]
sigma2x <- var(x)
sigma2y <- var(y)
# sigma^2 has some discrepancy
# sigma2epsilon <- summary(obj)$sigma^2
sigma2epsilon <- sigma2y - (beta2^2 * sigma2x)
# mean structure
mux <- mean(x)

x <- rnorm(n = n, mean = mux, sd = sqrt(sigma2x))
epsilon <- rnorm(n = n, mean = 0, sd = sqrt(sigma2epsilon))
y <- beta1 + beta2 * x + epsilon
obj <- lm(y ~ x)
beta <- unname(coef(obj))
names(beta) <- c("beta1", "beta2")
# covariance structure
beta1 <- beta[1]
beta2 <- beta[2]
sigma2x <- var(x)
sigma2y <- var(y)
sigmayx <- cov(y, x)
ryx <- cor(y, x)
Sigmatheta <- matrix(
  data = c(
    sigma2y,
    sigmayx,
    sigmayx,
    sigma2x
  ),
  nrow = 2,
  ncol = 2
)
# sigma^2 has some discrepancy
# sigma2epsilon <- summary(obj)$sigma^2
sigma2epsilon <- sigma2y - (beta2^2 * sigma2x)
# mean structure
mux <- mean(x)
muy <- mean(y)
mutheta <- matrix(
  data = c(muy, mux),
  ncol = 1
)
X <- cbind(
  constant = 1,
  education = x
)
y <- matrix(
  data = y,
  ncol = 1
)
colnames(y) <- "wages"

The jeksterslabRlinreg package has functions to derive expectations for regression models.

jeksterslabRlinreg::mutheta() returns the expected values, that is, the model-implied mean vector.
jeksterslabRlinreg::Sigmatheta() returns the covariance expectations, that is, the model-implied variance-covariance matrix.

In this hypothetical example, we assume that we have population data and we are interested in the association between wages and education. The regressor variable is years of education. The regressand variable is hourly wage in US dollars. The intercept is the predicted wage of an employee with 0 years of education. The slope is the increase in hourly wage in US dollars for one year increase in education.

plot(x = x, y = y, xlab = "Years of Education", ylab = "Hourly Wages in US Dollars")
abline(obj, col = "red")

The the following vectors represent the parameters of the simple linear regression model.

\begin{equation} \boldsymbol{\theta}_{\text{mean structure}} = \begin{bmatrix} \beta_1 \ \mu_x \end{bmatrix} \end{equation}

\begin{equation} \boldsymbol{\theta}{\text{covariance structure}} = \begin{bmatrix} \beta_2 \ \sigma{\varepsilon}^{2} \ \sigma_{x}^{2} \end{bmatrix} \end{equation}

Variable <- c(
  "`beta1`",
  "`mux`"
)
Description <- c(
  "Intercept",
  "Mean of $x$"
)
Notation <- c(
  "$\\beta_1$",
  "$\\mu_x$"
)
Value <- c(
  beta1,
  mux
)
knitr::kable(
  x = data.frame(
    Variable,
    Description,
    Notation,
    Value
  ),
  caption = "$\\boldsymbol{\\theta}_{\\text{mean structure}}$",
  row.names = FALSE
)

Variable <- c(
  "`beta2`",
  "`sigma2epsilon`",
  "`sigma2x`"
)
Description <- c(
  "Slope",
  "Variance of $\\varepsilon$",
  "Variance of $x$"
)
Notation <- c(
  "$\\beta_2$",
  "$\\sigma_{\\varepsilon}^{2}$",
  "$\\sigma_{x}^{2}$"
)
Value <- c(
  beta2,
  sigma2epsilon,
  sigma2x
)
knitr::kable(
  x = data.frame(
    Variable,
    Description,
    Notation,
    Value
  ),
  caption = "$\\boldsymbol{\\theta}_{\\text{covariance structure}}$",
  row.names = FALSE
)

There are times when the intercept does not lend to a meaningful interpretation. A negative wage corresponding to zero years of education does not really make a lot of sense. The value that is interesting in this case is the slope. The slope represents r beta2 US dollar increase in hourly wages given 1 additional year of education.

The expected values are derived using jeksterslabRlinreg::mutheta().

result_mutheta <- jeksterslabRlinreg::mutheta(
  beta = beta,
  muX = mux
)

colnames(result_mutheta) <- "$\\mu$"
rownames(result_mutheta) <- varnames
knitr::kable(
  x = result_mutheta,
  caption = "Expected Values"
)

The covariance expectations are derive using jeksterslabRlinreg::Sigmatheta().

result_Sigmatheta <- jeksterslabRlinreg::Sigmatheta(
  slopes = beta2,
  sigma2epsilon = sigma2epsilon,
  SigmaX = sigma2x
)

colnames(result_Sigmatheta) <- varnames
rownames(result_Sigmatheta) <- varnames
knitr::kable(
  x = result_Sigmatheta,
  caption = "Covariance Expectations"
)

The inverse, that is, deriving the parameters from the expectations can also be done using functions from the jeksterslabRlinreg package.

The regression coefficients can be derived using jeksterslabRlinreg::intercept(), and jeksterslabRlinreg::slopes() from the means and the covariances.

result_slopes <- jeksterslabRlinreg::.slopes(
  SigmaX = sigma2x,
  sigmayX = sigmayx
)
result_slopes
result_intercept <- jeksterslabRlinreg::.intercept(
  slopes = result_slopes,
  muy = muy,
  muX = mux
)
result_intercept

From raw data

result_slopesfromrawdata <- jeksterslabRlinreg::slopes(
  X = X,
  y = y
)
result_slopesfromrawdata
result_interceptfromrawdata <- jeksterslabRlinreg::intercept(
  X = X,
  y = y
)
result_interceptfromrawdata

Standardized slopes can also be obtained using jeksterslabRlinreg::slopesprime()

result_slopesprime <- jeksterslabRlinreg::.slopesprime(
  RX = sqrt(sigma2x),
  ryX = ryx
)
result_slopesprime

From raw data

result_slopesprimefromrawdata <- jeksterslabRlinreg::slopesprime(
  X = X,
  y = y
)
result_slopesprimefromrawdata

The error variance can also be derived with the available information using jeksterslabRlinreg::sigma2() .

result_sigma2epsilon <- sigma2epsilon(
  slopes = beta2,
  sigma2y = sigma2y,
  sigmayX = sigmayx,
  SigmaX = sigma2x
)
result_sigma2epsilon

context("Test simple-regression-ram")
test_that("result_mutheta", {
  for (i in 1:nrow(result_mutheta)) {
    expect_equivalent(
      result_mutheta[i, 1],
      mutheta[i, 1]
    )
  }
})
test_that("result_Sigmatheta", {
  for (i in 1:nrow(result_Sigmatheta)) {
    for (j in 1:ncol(result_Sigmatheta)) {
      expect_equivalent(
        result_Sigmatheta[i, j],
        Sigmatheta[i, j]
      )
    }
  }
})
test_that("results_beta", {
  results_beta <- c(
    result_intercept,
    result_slopes
  )
  for (i in 1:length(results_beta)) {
    expect_equivalent(
      results_beta[i],
      beta[i]
    )
  }
})
test_that("results_betafromrawdata", {
  results_betafromrawdata <- c(
    result_interceptfromrawdata,
    result_slopesfromrawdata
  )
  for (i in 1:length(results_betafromrawdata)) {
    expect_equivalent(
      results_betafromrawdata[i],
      beta[i]
    )
  }
})