tests/testthat/test.regressionCoefficientsAnalytical.R

# Setup -------------------------------------------------------------------
context("Script vs. beta_gen-calculated regression coefficients")

n <- 1e4
cum_prob_xw    <- list(1, 1, 1, c(.4, 1), c(.3, .8, 1))
cum_prob_yx1   <- list(1, 1)
cum_prob_yx    <- list(1, 1, 1)
cum_prob_yw1   <- list(1, c(.4, 1))
cum_prob_yw2   <- list(1, c(.3, .8, 1))
cum_prob_yw    <- list(1, c(.4, 1), c(.3, .8, 1))
cum_prob_yx1w1 <- list(1, 1, c(.4, 1))
cum_prob_yx1w2 <- list(1, 1, c(.3, .8, 1))
cum_prob_yx1w  <- list(1, 1, c(.4, 1), c(.3, .8, 1))
cum_prob_yxw1  <- list(1, 1, 1, c(.4, 1))
cum_prob_yxw2  <- list(1, 1, 1, c(.3, .8, 1))
cum_prob_yxw   <- list(1, 1, 1, c(.4, 1), c(.3, .8, 1))
cov_mx_yxw <- matrix(c(1, .1, .1, .2, .2,
                      .1, 1, .3, .4, .4,
                      .1, .3, 1, .4, .4,
                      .2, .4, .4, 1, .5,
                      .2, .4, .4, .5, 1), 5)
cov_mx_yx1   <- cov_mx_yxw[c(1, 2), c(1, 2)]
cov_mx_yx    <- cov_mx_yxw[c(1, 2, 3), c(1, 2, 3)]
cov_mx_yw1   <- cov_mx_yxw[c(1, 4), c(1, 4)]
cov_mx_yw2   <- cov_mx_yxw[c(1, 5), c(1, 5)]
cov_mx_yw    <- cov_mx_yxw[c(1, 4, 5), c(1, 4, 5)]
cov_mx_yx1w1 <- cov_mx_yxw[c(1, 2, 4), c(1, 2, 4)]
cov_mx_yx1w2 <- cov_mx_yxw[c(1, 2, 5), c(1, 2, 5)]
cov_mx_yx1w  <- cov_mx_yxw[c(1, 2, 4, 5), c(1, 2, 4, 5)]
cov_mx_yxw1  <- cov_mx_yxw[c(1, 2, 3, 4), c(1, 2, 3, 4)]
cov_mx_yxw2  <- cov_mx_yxw[c(1, 2, 3, 5), c(1, 2, 3, 5)]
mu_yx <- 0:2
sd_yx <- 1:3

# Generate data -----------------------------------------------------------
q_gen <- function(data, cov_mx, mean) {
  suppressMessages(questionnaire_gen(n, data, cov_matrix = cov_mx, theta = TRUE,
                                     family = "gaussian", full_output = TRUE,
                                     c_mean = mean))
}

df_yx1   <- q_gen(cum_prob_yx1, cov_mx_yx1, mu_yx[c(1, 2)])
df_yx    <- q_gen(cum_prob_yx, cov_mx_yx, mu_yx[c(1, 2, 3)])
df_yw1   <- q_gen(cum_prob_yw1, cov_mx_yw1, mu_yx[c(1)])
df_yw2   <- q_gen(cum_prob_yw2, cov_mx_yw2, mu_yx[c(1)])
df_yw    <- q_gen(cum_prob_yw, cov_mx_yw, mu_yx[c(1)])
df_yx1w1 <- q_gen(cum_prob_yx1w1, cov_mx_yx1w1, mu_yx[c(1, 2)])
df_yx1w2 <- q_gen(cum_prob_yx1w2, cov_mx_yx1w2, mu_yx[c(1, 2)])
df_yx1w  <- q_gen(cum_prob_yx1w, cov_mx_yx1w, mu_yx[c(1:2)])
df_yxw1  <- q_gen(cum_prob_yxw1, cov_mx_yxw1, mu_yx[c(1:3)])
df_yxw2  <- q_gen(cum_prob_yxw2, cov_mx_yxw2, mu_yx[c(1:3)])
df_yxw   <- q_gen(cum_prob_yxw, cov_mx_yxw, mu_yx[c(1:3)])

yx1   <- setNames(df_yx1$bg, c("subject", "y", "x1"))
yx    <- setNames(df_yx$bg, c("subject", "y", "x1", "x2"))
yw1   <- setNames(df_yw1$bg, c("subject", "y", "w1"))
yw2   <- setNames(df_yw2$bg, c("subject", "y", "w2"))
yw    <- setNames(df_yw$bg, c("subject", "y", "w1", "w2"))
yx1w1 <- setNames(df_yx1w1$bg, c("subject", "y", "x1", "w1"))
yx1w2 <- setNames(df_yx1w2$bg, c("subject", "y", "x1","w2"))
yx1w  <- setNames(df_yx1w$bg, c("subject", "y", "x1", "w1", "w2"))
yxw1  <- setNames(df_yxw1$bg, c("subject", "y", "x1", "x2", "w1"))
yxw2  <- setNames(df_yxw2$bg, c("subject", "y", "x1", "x2", "w2"))
yxw   <- setNames(df_yxw$bg, c("subject", "y", "x1", "x2", "w1", "w2"))

# Test data (to be removed) ------------------------------------------
mu_yxz  <- list(y = mu_yx[1], x1 = mu_yx[2], x2 = mu_yx[3], z1 = 0, z2 = 0)
var_yxz <- list(y = sd_yx[1] ^ 2, x1 = sd_yx[2] ^ 2, x2 = sd_yx[3] ^ 2, z1 = 1, z2 = 1)
sd_yxz  <- lapply(var_yxz, sqrt)
num_yx <- 3
cols_z <- 4:5

# Discrete variables generated from Z
cum_prob <- list(w1 = c(0, .4, 1), w2 = c(0, .3, .8, 1))  # != format from q_gen
names_w <- names(cum_prob)
mu_w <- lapply(cum_prob, diff)
mu_w_minus_1 <- unlist(sapply(mu_w, function(w) w[-1]))
var_w <- lapply(mu_w, function(p) p * (1 - p))
sd_w <- lapply(var_w, sqrt)

cov_yx <- .1
cov_yz <- .2
cov_xx <- .3
cov_xz <- .4
cov_zz <- .5
vcov_yxz <- cov_mx_yxw
dimnames(vcov_yxz) <- list(names(mu_yxz), names(mu_yxz))
q_z <- lapply(cum_prob, function(x) qnorm(x, mean = 0, sd = 1))  # Z ~ N(0, 1)

# Regressions -------------------------------------------------------------
reg_yx1   <- lm(y ~ x1               , yx1)
reg_yx    <- lm(y ~ x1 + x2          , yx)
reg_yw1   <- lm(y ~           w1     , yw1)
reg_yw2   <- lm(y ~                w2, yw2)
reg_yw    <- lm(y ~           w1 + w2, yw)
reg_yx1w1 <- lm(y ~ x1 +      w1     , yx1w1)
reg_yx1w2 <- lm(y ~ x1 +           w2, yx1w2)
reg_yx1w  <- lm(y ~ x1 +      w1 + w2, yx1w)
reg_yxw1  <- lm(y ~ x1 + x2 + w1     , yxw1)
reg_yxw2  <- lm(y ~ x1 + x2 +      w2, yxw2)
reg_yxw   <- lm(y ~ x1 + x2 + w1 + w2, yxw)

# Calculating elements of vcov_yxw ----------------------------------------
# Missing pieces: all covariances involving W, i.e., Cov(Y, W), Cov(X, W) and
# Cov(W, W). Calculations below:
exp_X_given_Y <- function(lo_lim, up_lim, mu_X = 0, cov_XY = .5, mu_Y = 0, sd_Y = 1) {
  a <- (lo_lim - mu_Y) / sd_Y
  b <- (up_lim - mu_Y) / sd_Y
  return(mu_X + (cov_XY / sd_Y) * (dnorm(a) - dnorm(b)) / (pnorm(b) - pnorm(a)))
}

# Calculates E(XW) or E(YW) for covariance later on
exp_AB2 <- function(names_b, mu_a, mu_b, cov_ab, q, ...) {
  exp_ab <- list()
  for (b in names_b) {
    exp_ab[[b]] <- vector()
    for (i in seq_along(mu_b[[b]])) {
      exp_ab[[b]][i] <- mu_b[[b]][i] *
        exp_X_given_Y(q[[b]][i], q[[b]][i + 1], mu_a, cov_ab, ...)
    }
  }
  return(exp_ab)
}

exp_yw <- exp_AB2(names_w, mu_yxz$y, mu_w, cov_yz, q_z)
exp_x1w <- exp_AB2(names_w, mu_yxz$x1, mu_w, cov_xz, q_z, sd_Y = sd_yxz$y)
exp_x2w <- exp_AB2(names_w, mu_yxz$x2, mu_w, cov_xz, q_z, sd_Y = sd_yxz$y)

cov_AB <- function(names_b, exp_ab, mu_a, mu_b) {
  covar <- sapply(names_b, function(b) (exp_ab[[b]] - mu_a * mu_b[[b]])[-1])
  return(covar)
}

# [-1] below removes first category
# From Cov(X, W) = E(XW) - E(X)E(W)
cov_yw <- cov_AB(names_w, exp_yw, mu_yxz$y, mu_w)
cov_x1w <- cov_AB(names_w, exp_x1w, mu_yxz$x1, mu_w)
cov_x2w <- cov_AB(names_w, exp_x2w, mu_yxz$x2, mu_w)

# Covariance matrix of the categories of W
create_vcov_w <- function(mu, var, remove_ref_cat = TRUE) {
  mx <- tcrossprod(mu, -mu)  # Covariances = - p_i * p_j
  diag(mx) <- var  # Variances equal p_i * (1 - p_i), not p_i ^ 2
  if (remove_ref_cat) mx <- mx[-1, -1]
  return(mx)
}
vcov_w <- sapply(names_w, function(w) create_vcov_w(mu_w[[w]], var_w[[w]]))

# Final assembly of true covariance matrix --------------------------------
vcov_order <- num_yx + sum(sapply(mu_w, function(w) length(w) - 1))
vcov_yxw <- matrix(nrow = vcov_order, ncol = vcov_order)
col_names <- c(names(mu_yxz)[-c(4, 5)], names(mu_w_minus_1))
dimnames(vcov_yxw) <- list(col_names, col_names)

# Adding Cov(Y, W)
vcov_yxw[1, ] <- vcov_yxw[, 1] <- c(var_yxz$y, cov_yx, cov_yx, unlist(cov_yw))

# Adding Cov(Y, X)
vcov_yxw[1:num_yx, 1:num_yx] <- vcov_yxz[-cols_z, -cols_z]

# Adding Cov(X, W)
vcov_yxw["x1", c("w1", "w21", "w22")] <- vcov_yxw[c("w1", "w21", "w22"), "x1"] <- unlist(cov_x1w)
vcov_yxw["x2", c("w1", "w21", "w22")] <- vcov_yxw[c("w1", "w21", "w22"), "x2"] <- unlist(cov_x2w)

# Adding Cov(W, W) for the same Z
vcov_yxw[4, 4] <- vcov_w$w1
vcov_yxw[5:6, 5:6] <- vcov_w$w2

# Adding Cov(W, W) for different Zs
calc_p_mvn_trunc <- function(lo, up, sig, mu = c(0, 0)) {
  mvtnorm::pmvnorm(lo, up, mu, sig)[1] / (pnorm(up[2]) - pnorm(lo[2]))
}

vcov_yxw["w1", c("w21", "w22")] <- vcov_yxw[c("w21", "w22"), "w1"] <-
  c(calc_p_mvn_trunc(c(q_z$w1[2], q_z$w2[2]), c(q_z$w1[3], q_z$w2[3]), sig = matrix(c(1, cov_zz, cov_zz, 1), 2)) * .5 - .6 * .5,
    calc_p_mvn_trunc(c(q_z$w1[2], q_z$w2[3]), c(q_z$w1[3], q_z$w2[4]), sig = matrix(c(1, cov_zz, cov_zz, 1), 2)) * .2 - .6 * .2)

# Creating submatrices
vcov_yx1   <- vcov_yxw[c("y", "x1"), c("y", "x1")]
vcov_yx    <- vcov_yxw[c("y", "x1", "x2"), c("y", "x1", "x2")]
vcov_yw1   <- vcov_yxw[c("y", "w1"), c("y", "w1")]
vcov_yw2   <- vcov_yxw[c("y", "w21", "w22"), c("y", "w21", "w22")]
vcov_yx1w1 <- vcov_yxw[c("y", "x1", "w1"), c("y", "x1", "w1")]
vcov_yx1w2 <- vcov_yxw[c("y", "x1", "w21", "w22"), c("y", "x1", "w21", "w22")]
vcov_yw    <- vcov_yxw[c("y", "w1", "w21", "w22"), c("y", "w1", "w21", "w22")]
vcov_yx1w  <- vcov_yxw[c("y", "x1", "w1", "w21", "w22"), c("y", "x1", "w1", "w21", "w22")]
vcov_yxw1  <- vcov_yxw[c("y", "x1", "x2", "w1"), c("y", "x1", "x2", "w1")]
vcov_yxw2  <- vcov_yxw[c("y", "x1", "x2", "w21", "w22"), c("y", "x1", "x2", "w21", "w22")]

# Calculating regression coefficients -------------------------------------
mu_xw <- c("x1" = mu_yxz$x1, "x2" = mu_yxz$x2, mu_w_minus_1)

calcRegCoeff <- function(cov, mu_y, mu_x) {
  beta <- solve(cov[-1, -1], cov[1, -1])
  alpha <- mu_y - crossprod(beta, mu_x)
  names_coeff <- c("theta", names(mu_x))
  out <- setNames(unlist(list(alpha, beta)), names_coeff)
  return(out)
}

cov_reg_yx1   <- calcRegCoeff(vcov_yx1, mu_yxz$y, mu_xw[c(1)])
cov_reg_yx    <- calcRegCoeff(vcov_yx, mu_yxz$y, mu_xw[c(1, 2)])
cov_reg_yw1   <- calcRegCoeff(vcov_yw1, mu_yxz$y, mu_xw[c(3)])
cov_reg_yw2   <- calcRegCoeff(vcov_yw2, mu_yxz$y, mu_xw[c(4, 5)])
cov_reg_yx1w1 <- calcRegCoeff(vcov_yx1w1, mu_yxz$y, mu_xw[c(1, 3)])
cov_reg_yx1w2 <- calcRegCoeff(vcov_yx1w2, mu_yxz$y, mu_xw[c(1, 4, 5)])
cov_reg_yw    <- calcRegCoeff(vcov_yw, mu_yxz$y, mu_xw[c(3, 4, 5)])
cov_reg_yx1w  <- calcRegCoeff(vcov_yx1w, mu_yxz$y, mu_xw[c(1, 3, 4, 5)])
cov_reg_yxw1  <- calcRegCoeff(vcov_yxw1, mu_yxz$y, mu_xw[c(1, 2, 3)])
cov_reg_yxw2  <- calcRegCoeff(vcov_yxw2, mu_yxz$y, mu_xw[c(1, 2, 4, 5)])
cov_reg_yxw   <- calcRegCoeff(vcov_yxw, mu_yxz$y, mu_xw)

# Using beta_gen
beta_reg_yx1   <- beta_gen(df_yx1)
beta_reg_yx    <- beta_gen(df_yx)
beta_reg_yw1   <- beta_gen(df_yw1)
beta_reg_yw2   <- beta_gen(df_yw2)
beta_reg_yx1w1 <- beta_gen(df_yx1w1)
beta_reg_yx1w2 <- beta_gen(df_yx1w2)
beta_reg_yw    <- beta_gen(df_yw)
beta_reg_yxw1  <- beta_gen(df_yxw1)
beta_reg_yxw2  <- beta_gen(df_yxw2)
beta_reg_yx1w  <- beta_gen(df_yx1w)
beta_reg_yxw   <- beta_gen(df_yxw)

# Benchmarking regression coefficients ------------------------------------
compareRegScrBeta <- function(reg_data, cov_data, beta_gen_data, print = FALSE) {
  tab <- rbind("reg" = coef(reg_data),
               "script" = cov_data,
               "beta_gen" = beta_gen_data)
  if (print) print(tab)
  diffs <- apply(tab, 2, function(x) max(c(x[2] - x[1],
                                           x[3] - x[1],
                                           x[3] - x[2])))
  return(list(tab = tab, diffs = diffs))
}
comp_yx1 <- compareRegScrBeta(reg_yx1, cov_reg_yx1, beta_reg_yx1)
comp_yx  <- compareRegScrBeta(reg_yx, cov_reg_yx, beta_reg_yx)
comp_yw1 <- compareRegScrBeta(reg_yw1, cov_reg_yw1, beta_reg_yw1)
comp_yw2 <- compareRegScrBeta(reg_yw2, cov_reg_yw2, beta_reg_yw2)
comp_yw  <- compareRegScrBeta(reg_yw, cov_reg_yw, beta_reg_yw)
comp_yx1w1 <- compareRegScrBeta(reg_yx1w1, cov_reg_yx1w1, beta_reg_yx1w1)
comp_yx1w2 <- compareRegScrBeta(reg_yx1w2, cov_reg_yx1w2, beta_reg_yx1w2)
comp_yxw1 <- compareRegScrBeta(reg_yxw1, cov_reg_yxw1, beta_reg_yxw1)
comp_yxw2 <- compareRegScrBeta(reg_yxw2, cov_reg_yxw2, beta_reg_yxw2)
comp_yx1w <- compareRegScrBeta(reg_yx1w, cov_reg_yx1w, beta_reg_yx1w)
comp_yxw <- compareRegScrBeta(reg_yxw, cov_reg_yxw, beta_reg_yxw)

test_that("Script and beta_gen solutions are equivalent", {
  expect_equal(comp_yx1$tab["script", ], comp_yx1$tab["beta_gen", ])
  expect_equal(comp_yx$tab["script", ], comp_yx$tab["beta_gen", ])
  expect_equal(comp_yw1$tab["script", ], comp_yw1$tab["beta_gen", ])
  expect_equal(comp_yw2$tab["script", ], comp_yw2$tab["beta_gen", ])
  expect_equal(comp_yx1w1$tab["script", ], comp_yx1w1$tab["beta_gen", ])
  expect_equal(comp_yx1w2$tab["script", ], comp_yx1w2$tab["beta_gen", ])
  expect_equal(comp_yw$tab["script", ], comp_yw$tab["beta_gen", ])
  expect_equal(comp_yxw1$tab["script", ], comp_yxw1$tab["beta_gen", ])
  expect_equal(comp_yxw2$tab["script", ], comp_yxw2$tab["beta_gen", ])
  expect_equal(comp_yx1w$tab["script", ], comp_yx1w$tab["beta_gen", ])
  expect_equal(comp_yxw$tab["script", ], comp_yxw$tab["beta_gen", ])
})

test_that("Numerical and analytical solutions are close", {
  expect_lt(max(comp_yx1$tab["reg", ]   - comp_yx1$tab["beta_gen", ]), 0.1)
  expect_lt(max(comp_yx$tab["reg", ]    - comp_yx$tab["beta_gen", ]), 0.1)
  expect_lt(max(comp_yw1$tab["reg", ]   - comp_yw1$tab["beta_gen", ]), 0.1)
  expect_lt(max(comp_yw2$tab["reg", ]   - comp_yw2$tab["beta_gen", ]), 0.1)
  expect_lt(max(comp_yx1w1$tab["reg", ] - comp_yx1w1$tab["beta_gen", ]), 0.1)
  expect_lt(max(comp_yx1w2$tab["reg", ] - comp_yx1w2$tab["beta_gen", ]), 0.1)
  expect_lt(max(comp_yw$tab["reg", ]    - comp_yw$tab["beta_gen", ]), 0.1)
  expect_lt(max(comp_yxw1$tab["reg", ]  - comp_yxw1$tab["beta_gen", ]), 0.1)
  expect_lt(max(comp_yxw2$tab["reg", ]  - comp_yxw2$tab["beta_gen", ]), 0.1)
  expect_lt(max(comp_yx1w$tab["reg", ]  -  comp_yx1w$tab["beta_gen", ]), 0.1)
  expect_lt(max(comp_yxw$tab["reg", ]   - comp_yxw$tab["beta_gen", ]), 0.1)
})

Try the lsasim package in your browser

Any scripts or data that you put into this service are public.

lsasim documentation built on Aug. 22, 2023, 5:09 p.m.