Nothing
# Setup -------------------------------------------------------------------
context("Script vs. beta_gen-calculated regression coefficients")
n <- 1e4
cum_prob_xw <- list(1, 1, 1, c(.4, 1), c(.3, .8, 1))
cum_prob_yx1 <- list(1, 1)
cum_prob_yx <- list(1, 1, 1)
cum_prob_yw1 <- list(1, c(.4, 1))
cum_prob_yw2 <- list(1, c(.3, .8, 1))
cum_prob_yw <- list(1, c(.4, 1), c(.3, .8, 1))
cum_prob_yx1w1 <- list(1, 1, c(.4, 1))
cum_prob_yx1w2 <- list(1, 1, c(.3, .8, 1))
cum_prob_yx1w <- list(1, 1, c(.4, 1), c(.3, .8, 1))
cum_prob_yxw1 <- list(1, 1, 1, c(.4, 1))
cum_prob_yxw2 <- list(1, 1, 1, c(.3, .8, 1))
cum_prob_yxw <- list(1, 1, 1, c(.4, 1), c(.3, .8, 1))
cov_mx_yxw <- matrix(c(1, .1, .1, .2, .2,
.1, 1, .3, .4, .4,
.1, .3, 1, .4, .4,
.2, .4, .4, 1, .5,
.2, .4, .4, .5, 1), 5)
cov_mx_yx1 <- cov_mx_yxw[c(1, 2), c(1, 2)]
cov_mx_yx <- cov_mx_yxw[c(1, 2, 3), c(1, 2, 3)]
cov_mx_yw1 <- cov_mx_yxw[c(1, 4), c(1, 4)]
cov_mx_yw2 <- cov_mx_yxw[c(1, 5), c(1, 5)]
cov_mx_yw <- cov_mx_yxw[c(1, 4, 5), c(1, 4, 5)]
cov_mx_yx1w1 <- cov_mx_yxw[c(1, 2, 4), c(1, 2, 4)]
cov_mx_yx1w2 <- cov_mx_yxw[c(1, 2, 5), c(1, 2, 5)]
cov_mx_yx1w <- cov_mx_yxw[c(1, 2, 4, 5), c(1, 2, 4, 5)]
cov_mx_yxw1 <- cov_mx_yxw[c(1, 2, 3, 4), c(1, 2, 3, 4)]
cov_mx_yxw2 <- cov_mx_yxw[c(1, 2, 3, 5), c(1, 2, 3, 5)]
mu_yx <- 0:2
sd_yx <- 1:3
# Generate data -----------------------------------------------------------
q_gen <- function(data, cov_mx, mean) {
suppressMessages(questionnaire_gen(n, data, cov_matrix = cov_mx, theta = TRUE,
family = "gaussian", full_output = TRUE,
c_mean = mean))
}
df_yx1 <- q_gen(cum_prob_yx1, cov_mx_yx1, mu_yx[c(1, 2)])
df_yx <- q_gen(cum_prob_yx, cov_mx_yx, mu_yx[c(1, 2, 3)])
df_yw1 <- q_gen(cum_prob_yw1, cov_mx_yw1, mu_yx[c(1)])
df_yw2 <- q_gen(cum_prob_yw2, cov_mx_yw2, mu_yx[c(1)])
df_yw <- q_gen(cum_prob_yw, cov_mx_yw, mu_yx[c(1)])
df_yx1w1 <- q_gen(cum_prob_yx1w1, cov_mx_yx1w1, mu_yx[c(1, 2)])
df_yx1w2 <- q_gen(cum_prob_yx1w2, cov_mx_yx1w2, mu_yx[c(1, 2)])
df_yx1w <- q_gen(cum_prob_yx1w, cov_mx_yx1w, mu_yx[c(1:2)])
df_yxw1 <- q_gen(cum_prob_yxw1, cov_mx_yxw1, mu_yx[c(1:3)])
df_yxw2 <- q_gen(cum_prob_yxw2, cov_mx_yxw2, mu_yx[c(1:3)])
df_yxw <- q_gen(cum_prob_yxw, cov_mx_yxw, mu_yx[c(1:3)])
yx1 <- setNames(df_yx1$bg, c("subject", "y", "x1"))
yx <- setNames(df_yx$bg, c("subject", "y", "x1", "x2"))
yw1 <- setNames(df_yw1$bg, c("subject", "y", "w1"))
yw2 <- setNames(df_yw2$bg, c("subject", "y", "w2"))
yw <- setNames(df_yw$bg, c("subject", "y", "w1", "w2"))
yx1w1 <- setNames(df_yx1w1$bg, c("subject", "y", "x1", "w1"))
yx1w2 <- setNames(df_yx1w2$bg, c("subject", "y", "x1","w2"))
yx1w <- setNames(df_yx1w$bg, c("subject", "y", "x1", "w1", "w2"))
yxw1 <- setNames(df_yxw1$bg, c("subject", "y", "x1", "x2", "w1"))
yxw2 <- setNames(df_yxw2$bg, c("subject", "y", "x1", "x2", "w2"))
yxw <- setNames(df_yxw$bg, c("subject", "y", "x1", "x2", "w1", "w2"))
# Test data (to be removed) ------------------------------------------
mu_yxz <- list(y = mu_yx[1], x1 = mu_yx[2], x2 = mu_yx[3], z1 = 0, z2 = 0)
var_yxz <- list(y = sd_yx[1] ^ 2, x1 = sd_yx[2] ^ 2, x2 = sd_yx[3] ^ 2, z1 = 1, z2 = 1)
sd_yxz <- lapply(var_yxz, sqrt)
num_yx <- 3
cols_z <- 4:5
# Discrete variables generated from Z
cum_prob <- list(w1 = c(0, .4, 1), w2 = c(0, .3, .8, 1)) # != format from q_gen
names_w <- names(cum_prob)
mu_w <- lapply(cum_prob, diff)
mu_w_minus_1 <- unlist(sapply(mu_w, function(w) w[-1]))
var_w <- lapply(mu_w, function(p) p * (1 - p))
sd_w <- lapply(var_w, sqrt)
cov_yx <- .1
cov_yz <- .2
cov_xx <- .3
cov_xz <- .4
cov_zz <- .5
vcov_yxz <- cov_mx_yxw
dimnames(vcov_yxz) <- list(names(mu_yxz), names(mu_yxz))
q_z <- lapply(cum_prob, function(x) qnorm(x, mean = 0, sd = 1)) # Z ~ N(0, 1)
# Regressions -------------------------------------------------------------
reg_yx1 <- lm(y ~ x1 , yx1)
reg_yx <- lm(y ~ x1 + x2 , yx)
reg_yw1 <- lm(y ~ w1 , yw1)
reg_yw2 <- lm(y ~ w2, yw2)
reg_yw <- lm(y ~ w1 + w2, yw)
reg_yx1w1 <- lm(y ~ x1 + w1 , yx1w1)
reg_yx1w2 <- lm(y ~ x1 + w2, yx1w2)
reg_yx1w <- lm(y ~ x1 + w1 + w2, yx1w)
reg_yxw1 <- lm(y ~ x1 + x2 + w1 , yxw1)
reg_yxw2 <- lm(y ~ x1 + x2 + w2, yxw2)
reg_yxw <- lm(y ~ x1 + x2 + w1 + w2, yxw)
# Calculating elements of vcov_yxw ----------------------------------------
# Missing pieces: all covariances involving W, i.e., Cov(Y, W), Cov(X, W) and
# Cov(W, W). Calculations below:
exp_X_given_Y <- function(lo_lim, up_lim, mu_X = 0, cov_XY = .5, mu_Y = 0, sd_Y = 1) {
a <- (lo_lim - mu_Y) / sd_Y
b <- (up_lim - mu_Y) / sd_Y
return(mu_X + (cov_XY / sd_Y) * (dnorm(a) - dnorm(b)) / (pnorm(b) - pnorm(a)))
}
# Calculates E(XW) or E(YW) for covariance later on
exp_AB2 <- function(names_b, mu_a, mu_b, cov_ab, q, ...) {
exp_ab <- list()
for (b in names_b) {
exp_ab[[b]] <- vector()
for (i in seq_along(mu_b[[b]])) {
exp_ab[[b]][i] <- mu_b[[b]][i] *
exp_X_given_Y(q[[b]][i], q[[b]][i + 1], mu_a, cov_ab, ...)
}
}
return(exp_ab)
}
exp_yw <- exp_AB2(names_w, mu_yxz$y, mu_w, cov_yz, q_z)
exp_x1w <- exp_AB2(names_w, mu_yxz$x1, mu_w, cov_xz, q_z, sd_Y = sd_yxz$y)
exp_x2w <- exp_AB2(names_w, mu_yxz$x2, mu_w, cov_xz, q_z, sd_Y = sd_yxz$y)
cov_AB <- function(names_b, exp_ab, mu_a, mu_b) {
covar <- sapply(names_b, function(b) (exp_ab[[b]] - mu_a * mu_b[[b]])[-1])
return(covar)
}
# [-1] below removes first category
# From Cov(X, W) = E(XW) - E(X)E(W)
cov_yw <- cov_AB(names_w, exp_yw, mu_yxz$y, mu_w)
cov_x1w <- cov_AB(names_w, exp_x1w, mu_yxz$x1, mu_w)
cov_x2w <- cov_AB(names_w, exp_x2w, mu_yxz$x2, mu_w)
# Covariance matrix of the categories of W
create_vcov_w <- function(mu, var, remove_ref_cat = TRUE) {
mx <- tcrossprod(mu, -mu) # Covariances = - p_i * p_j
diag(mx) <- var # Variances equal p_i * (1 - p_i), not p_i ^ 2
if (remove_ref_cat) mx <- mx[-1, -1]
return(mx)
}
vcov_w <- sapply(names_w, function(w) create_vcov_w(mu_w[[w]], var_w[[w]]))
# Final assembly of true covariance matrix --------------------------------
vcov_order <- num_yx + sum(sapply(mu_w, function(w) length(w) - 1))
vcov_yxw <- matrix(nrow = vcov_order, ncol = vcov_order)
col_names <- c(names(mu_yxz)[-c(4, 5)], names(mu_w_minus_1))
dimnames(vcov_yxw) <- list(col_names, col_names)
# Adding Cov(Y, W)
vcov_yxw[1, ] <- vcov_yxw[, 1] <- c(var_yxz$y, cov_yx, cov_yx, unlist(cov_yw))
# Adding Cov(Y, X)
vcov_yxw[1:num_yx, 1:num_yx] <- vcov_yxz[-cols_z, -cols_z]
# Adding Cov(X, W)
vcov_yxw["x1", c("w1", "w21", "w22")] <- vcov_yxw[c("w1", "w21", "w22"), "x1"] <- unlist(cov_x1w)
vcov_yxw["x2", c("w1", "w21", "w22")] <- vcov_yxw[c("w1", "w21", "w22"), "x2"] <- unlist(cov_x2w)
# Adding Cov(W, W) for the same Z
vcov_yxw[4, 4] <- vcov_w$w1
vcov_yxw[5:6, 5:6] <- vcov_w$w2
# Adding Cov(W, W) for different Zs
calc_p_mvn_trunc <- function(lo, up, sig, mu = c(0, 0)) {
mvtnorm::pmvnorm(lo, up, mu, sig)[1] / (pnorm(up[2]) - pnorm(lo[2]))
}
vcov_yxw["w1", c("w21", "w22")] <- vcov_yxw[c("w21", "w22"), "w1"] <-
c(calc_p_mvn_trunc(c(q_z$w1[2], q_z$w2[2]), c(q_z$w1[3], q_z$w2[3]), sig = matrix(c(1, cov_zz, cov_zz, 1), 2)) * .5 - .6 * .5,
calc_p_mvn_trunc(c(q_z$w1[2], q_z$w2[3]), c(q_z$w1[3], q_z$w2[4]), sig = matrix(c(1, cov_zz, cov_zz, 1), 2)) * .2 - .6 * .2)
# Creating submatrices
vcov_yx1 <- vcov_yxw[c("y", "x1"), c("y", "x1")]
vcov_yx <- vcov_yxw[c("y", "x1", "x2"), c("y", "x1", "x2")]
vcov_yw1 <- vcov_yxw[c("y", "w1"), c("y", "w1")]
vcov_yw2 <- vcov_yxw[c("y", "w21", "w22"), c("y", "w21", "w22")]
vcov_yx1w1 <- vcov_yxw[c("y", "x1", "w1"), c("y", "x1", "w1")]
vcov_yx1w2 <- vcov_yxw[c("y", "x1", "w21", "w22"), c("y", "x1", "w21", "w22")]
vcov_yw <- vcov_yxw[c("y", "w1", "w21", "w22"), c("y", "w1", "w21", "w22")]
vcov_yx1w <- vcov_yxw[c("y", "x1", "w1", "w21", "w22"), c("y", "x1", "w1", "w21", "w22")]
vcov_yxw1 <- vcov_yxw[c("y", "x1", "x2", "w1"), c("y", "x1", "x2", "w1")]
vcov_yxw2 <- vcov_yxw[c("y", "x1", "x2", "w21", "w22"), c("y", "x1", "x2", "w21", "w22")]
# Calculating regression coefficients -------------------------------------
mu_xw <- c("x1" = mu_yxz$x1, "x2" = mu_yxz$x2, mu_w_minus_1)
calcRegCoeff <- function(cov, mu_y, mu_x) {
beta <- solve(cov[-1, -1], cov[1, -1])
alpha <- mu_y - crossprod(beta, mu_x)
names_coeff <- c("theta", names(mu_x))
out <- setNames(unlist(list(alpha, beta)), names_coeff)
return(out)
}
cov_reg_yx1 <- calcRegCoeff(vcov_yx1, mu_yxz$y, mu_xw[c(1)])
cov_reg_yx <- calcRegCoeff(vcov_yx, mu_yxz$y, mu_xw[c(1, 2)])
cov_reg_yw1 <- calcRegCoeff(vcov_yw1, mu_yxz$y, mu_xw[c(3)])
cov_reg_yw2 <- calcRegCoeff(vcov_yw2, mu_yxz$y, mu_xw[c(4, 5)])
cov_reg_yx1w1 <- calcRegCoeff(vcov_yx1w1, mu_yxz$y, mu_xw[c(1, 3)])
cov_reg_yx1w2 <- calcRegCoeff(vcov_yx1w2, mu_yxz$y, mu_xw[c(1, 4, 5)])
cov_reg_yw <- calcRegCoeff(vcov_yw, mu_yxz$y, mu_xw[c(3, 4, 5)])
cov_reg_yx1w <- calcRegCoeff(vcov_yx1w, mu_yxz$y, mu_xw[c(1, 3, 4, 5)])
cov_reg_yxw1 <- calcRegCoeff(vcov_yxw1, mu_yxz$y, mu_xw[c(1, 2, 3)])
cov_reg_yxw2 <- calcRegCoeff(vcov_yxw2, mu_yxz$y, mu_xw[c(1, 2, 4, 5)])
cov_reg_yxw <- calcRegCoeff(vcov_yxw, mu_yxz$y, mu_xw)
# Using beta_gen
beta_reg_yx1 <- beta_gen(df_yx1)
beta_reg_yx <- beta_gen(df_yx)
beta_reg_yw1 <- beta_gen(df_yw1)
beta_reg_yw2 <- beta_gen(df_yw2)
beta_reg_yx1w1 <- beta_gen(df_yx1w1)
beta_reg_yx1w2 <- beta_gen(df_yx1w2)
beta_reg_yw <- beta_gen(df_yw)
beta_reg_yxw1 <- beta_gen(df_yxw1)
beta_reg_yxw2 <- beta_gen(df_yxw2)
beta_reg_yx1w <- beta_gen(df_yx1w)
beta_reg_yxw <- beta_gen(df_yxw)
# Benchmarking regression coefficients ------------------------------------
compareRegScrBeta <- function(reg_data, cov_data, beta_gen_data, print = FALSE) {
tab <- rbind("reg" = coef(reg_data),
"script" = cov_data,
"beta_gen" = beta_gen_data)
if (print) print(tab)
diffs <- apply(tab, 2, function(x) max(c(x[2] - x[1],
x[3] - x[1],
x[3] - x[2])))
return(list(tab = tab, diffs = diffs))
}
comp_yx1 <- compareRegScrBeta(reg_yx1, cov_reg_yx1, beta_reg_yx1)
comp_yx <- compareRegScrBeta(reg_yx, cov_reg_yx, beta_reg_yx)
comp_yw1 <- compareRegScrBeta(reg_yw1, cov_reg_yw1, beta_reg_yw1)
comp_yw2 <- compareRegScrBeta(reg_yw2, cov_reg_yw2, beta_reg_yw2)
comp_yw <- compareRegScrBeta(reg_yw, cov_reg_yw, beta_reg_yw)
comp_yx1w1 <- compareRegScrBeta(reg_yx1w1, cov_reg_yx1w1, beta_reg_yx1w1)
comp_yx1w2 <- compareRegScrBeta(reg_yx1w2, cov_reg_yx1w2, beta_reg_yx1w2)
comp_yxw1 <- compareRegScrBeta(reg_yxw1, cov_reg_yxw1, beta_reg_yxw1)
comp_yxw2 <- compareRegScrBeta(reg_yxw2, cov_reg_yxw2, beta_reg_yxw2)
comp_yx1w <- compareRegScrBeta(reg_yx1w, cov_reg_yx1w, beta_reg_yx1w)
comp_yxw <- compareRegScrBeta(reg_yxw, cov_reg_yxw, beta_reg_yxw)
test_that("Script and beta_gen solutions are equivalent", {
expect_equal(comp_yx1$tab["script", ], comp_yx1$tab["beta_gen", ])
expect_equal(comp_yx$tab["script", ], comp_yx$tab["beta_gen", ])
expect_equal(comp_yw1$tab["script", ], comp_yw1$tab["beta_gen", ])
expect_equal(comp_yw2$tab["script", ], comp_yw2$tab["beta_gen", ])
expect_equal(comp_yx1w1$tab["script", ], comp_yx1w1$tab["beta_gen", ])
expect_equal(comp_yx1w2$tab["script", ], comp_yx1w2$tab["beta_gen", ])
expect_equal(comp_yw$tab["script", ], comp_yw$tab["beta_gen", ])
expect_equal(comp_yxw1$tab["script", ], comp_yxw1$tab["beta_gen", ])
expect_equal(comp_yxw2$tab["script", ], comp_yxw2$tab["beta_gen", ])
expect_equal(comp_yx1w$tab["script", ], comp_yx1w$tab["beta_gen", ])
expect_equal(comp_yxw$tab["script", ], comp_yxw$tab["beta_gen", ])
})
test_that("Numerical and analytical solutions are close", {
expect_lt(max(comp_yx1$tab["reg", ] - comp_yx1$tab["beta_gen", ]), 0.1)
expect_lt(max(comp_yx$tab["reg", ] - comp_yx$tab["beta_gen", ]), 0.1)
expect_lt(max(comp_yw1$tab["reg", ] - comp_yw1$tab["beta_gen", ]), 0.1)
expect_lt(max(comp_yw2$tab["reg", ] - comp_yw2$tab["beta_gen", ]), 0.1)
expect_lt(max(comp_yx1w1$tab["reg", ] - comp_yx1w1$tab["beta_gen", ]), 0.1)
expect_lt(max(comp_yx1w2$tab["reg", ] - comp_yx1w2$tab["beta_gen", ]), 0.1)
expect_lt(max(comp_yw$tab["reg", ] - comp_yw$tab["beta_gen", ]), 0.1)
expect_lt(max(comp_yxw1$tab["reg", ] - comp_yxw1$tab["beta_gen", ]), 0.1)
expect_lt(max(comp_yxw2$tab["reg", ] - comp_yxw2$tab["beta_gen", ]), 0.1)
expect_lt(max(comp_yx1w$tab["reg", ] - comp_yx1w$tab["beta_gen", ]), 0.1)
expect_lt(max(comp_yxw$tab["reg", ] - comp_yxw$tab["beta_gen", ]), 0.1)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.