# COP standards for Coefficient of Proportionality
# test_cop calculates both versions of COP (Oster's approx & exact)
test_cop <- function(est_eff, # unstandardized
std_err, # unstandardized
n_obs,
n_covariates, # the number of z
sdx,
sdy,
R2, # NOT the adjusted R2, should be the original R2
eff_thr = 0, # this is the unstandardized version
FR2max_multiplier = 1.3,
FR2max = 0, # NOT the adjusted R2, should be the original R2
alpha = 0.05,
tails = 2,
to_return = to_return){
## test example
# est_eff = .125
# std_err = .050
# n_obs = 6174
# n_covariates = 7
# sdx = .217
# sdy = .991
# R2 = .251
# eff_thr = 0
# FR2max = .61
# test_cop(est_eff = .4, std_err = .1, n_obs = 290,
# sdx = 2, sdy = 6, R2 = .7, eff_thr = 0, FR2max = .8, to_return = "short")
## prepare input
df <- n_obs - n_covariates - 3 ## df of M3
var_x <- sdx^2
var_y <- sdy^2
### if the user specifies R2max directly then we use the specified R2max
if (FR2max == 0){FR2max <- FR2max_multiplier * R2}
var_z <- sdz <- 1
## error message if input is inappropriate
if (!(std_err > 0)) {stop("Did not run! Standard error needs to be
greater than zero.")}
if (!(sdx > 0)) {stop("Did not run! Standard deviation of x needs to be
greater than zero.")}
if (!(sdy > 0)) {stop("Did not run! Standard deviation of y needs to be
greater than zero.")}
if (!(n_obs > n_covariates + 3)) {
stop("Did not run! There are too few observations relative to the
number of observations and covariates. Please specify a less
complex model to use KonFound-It.")}
if (!(R2 < FR2max)) {stop("Did not run! R2 Max needs to be greater than R2.")}
if (!(FR2max < 1)) {stop("Did not run! R2 Max needs to be less than 1.")}
if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) {
stop("Did not run! Entered values produced Rxz^2 <=0,
consider adding more significant digits to your entered values.")}
# an indicator of whether the use specified est_eff is negative,
# 1 is yes negative
negest <- 0
if (est_eff < 0) {
est_eff <- abs(est_eff)
negest <- 1
}
## now standardize
beta_thr <- eff_thr * sdx / sdy
beta <- est_eff * sdx / sdy
SE <- std_err * sdx / sdy
## observed regression, reg y on x Given z
tyxGz <- beta / SE ### this should be equal to est_eff / std_err
ryxGz <- tyxGz / sqrt(df + 1 + tyxGz^2)
## df + 1 because omitted variable is NOT included in M2
ryxGz_M2 <- tyxGz / sqrt(n_obs + tyxGz^2)
## ryxGz_M2 is only for simulation to recover the exact number
## make sure R2 due to x alone is not larger than overall or observed R2
if (ryxGz^2 > R2) {illcnd_ryxGz <- TRUE} else {illcond_ryxGz <- FALSE}
## calculate ryz, rxz, rxy
ryz <- rzy <- cal_ryz(ryxGz, R2)
rxz <- rzx <- cal_rxz(var_x, var_y, R2, df + 1, std_err)
## df + 1 because omitted variable is NOT included in M2
#### we change the n_obs to df to recover the rxz as in the particular sample
## note that in the updated approach rxy is not necessary to calculate
## rxcv_exact, ryxcv_exact and delta_exact
rxy <- ryx <- cal_rxy(ryxGz, rxz, ryz)
rxy_M2 <- cal_rxy(ryxGz_M2, rxz, ryz)
## rxy_M2 is only for simulation to recover the exact number
## baseline regression model, no z (unconditional)
eff_uncond <- sqrt((var_y / var_x)) * rxy
t_uncond <- rxy * sqrt(n_obs - 2)/sqrt(1 - rxy^2)
## n_obs - 2 - adjust the df in the M1
std_err_uncond <- eff_uncond / t_uncond
R2_uncond <- rxy^2
## calculate delta_star
delta_star <- cal_delta_star(FR2max, R2, R2_uncond, est_eff,
eff_thr, var_x, var_y, eff_uncond,
rxz, n_obs)
## now introduce cv
sdcv <- var_cv <- 1
rcvz <- rzcv <- 0
v <- 1 - rxz^2 # this is to simplify calculation later
D <- sqrt(FR2max - R2) # same above
## calculate rxcv & rycv implied by Oster from delta_star (assumes rcvz=0)
rxcv_oster <- rcvx_oster <- delta_star * rxz * (sdcv / sdz)
if (abs(rcvx_oster) <1 && (rcvx_oster^2/v)<1)
{rcvy_oster <- rycv_oster <-
D * sqrt(1 - (rcvx_oster^2 / v)) +
(ryx * rcvx_oster) / (v) -
(ryz * rcvx_oster * rxz) / (v)}
# Checking beta, R2, se generated by delta_star with a regression
verify_oster <- verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv,
rxy, rxz, rzy, rycv_oster, rxcv_oster, rcvz)
# prepare some other values in the final Table (long output)
R2_M3_oster <- as.numeric(verify_oster[1])
eff_x_M3_oster <- as.numeric(verify_oster[2])
# should be equivalent or very close to eff_thr
se_x_M3_oster <- as.numeric(verify_oster[3])
beta_x_M3_oster <- as.numeric(verify_oster[9])
# should be equivalent or very close to beta_thr
t_x_M3_oster <- eff_x_M3_oster / se_x_M3_oster
eff_z_M3_oster <- as.numeric(verify_oster[4])
se_z_M3_oster <- as.numeric(verify_oster[5])
eff_cv_M3_oster <- as.numeric(verify_oster[6])
se_cv_M3_oster <- as.numeric(verify_oster[7])
cov_oster <- verify_oster[[11]]
cor_oster <- verify_oster[[12]]
## calculate the exact/true rcvx, rcvy, delta
## (updated version that does not need rxy)
### the idea is to calculate everything conditional on z
sdxGz <- sdx * sqrt(1 - rxz^2)
sdyGz <- sdy * sqrt(1 - ryz^2)
ryxcvGz_exact_sq <- (FR2max - ryz^2) / (1 - ryz^2)
### equation 7 in the manuscript
rxcvGz_exact <- (ryxGz - sdxGz / sdyGz * beta_thr) /
sqrt((sdxGz^2) / (sdyGz^2) * (beta_thr^2) -
2 * ryxGz * sdxGz / sdyGz * beta_thr +
ryxcvGz_exact_sq)
### equation 6 in the manuscript
rycvGz_exact <- ryxGz * rxcvGz_exact +
sqrt((ryxcvGz_exact_sq - ryxGz^2) *
(1 - rxcvGz_exact^2))
### now get unconditional exact rxcv and rycv
rycv_exact <- sqrt(1 - ryz^2) * rycvGz_exact
rxcv_exact <- sqrt(1 - rxz^2) * rxcvGz_exact
delta_exact <- rxcv_exact / rxz
## previous approach - comment out, but could find in cop_pse_auxiliary
## exact_result <- cal_delta_exact(ryx, ryz, rxz, beta_thr, FR2max, R2, sdx, sdz)
## rxcv_exact <- rcvx_exact <- as.numeric(exact_result[1])
## rycv_exact <- rcvy_exact <- as.numeric(exact_result[2])
## delta_exact <- as.numeric(exact_result[3])
# Checking beta, R2, se generated by True/Exact Delta with a regression
verify_exact <- verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv,
rxy, rxz, rzy, rycv_exact, rxcv_exact, rcvz)
### verify_exact[1] == verify_exact[4] == FR2max
### verify_exact[2] == eff_thr
### verify_exact[5] == beta_thr
# calculate % bias in delta comparing oster's delta_star with true delta
delta_pctbias <- 100 * (delta_star - delta_exact) / delta_exact
# prepare some other values in the final Table (long output)
R2_M3 <- as.numeric(verify_exact[1])
eff_x_M3 <- as.numeric(verify_exact[2])
# should be equivalent or very close to eff_thr
se_x_M3 <- as.numeric(verify_exact[3])
beta_x_M3 <- as.numeric(verify_exact[9])
# should be equivalent or very close to beta_thr
t_x_M3 <- eff_x_M3 / se_x_M3
eff_z_M3 <- as.numeric(verify_exact[4])
se_z_M3 <- as.numeric(verify_exact[5])
eff_cv_M3 <- as.numeric(verify_exact[6])
se_cv_M3 <- as.numeric(verify_exact[7])
cov_exact <- verify_exact[[11]]
cor_exact <- verify_exact[[12]]
verify_pse_reg_M2 <- verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy_M2, rxz, rzy)
R2_M2 <- as.numeric(verify_pse_reg_M2[1])
eff_x_M2 <- as.numeric(verify_pse_reg_M2[2])
# should be equivalent or very close to est_eff
se_x_M2 <- as.numeric(verify_pse_reg_M2[3])
# should be equivalent or very close to std_err
eff_z_M2 <- as.numeric(verify_pse_reg_M2[4])
se_z_M2 <- as.numeric(verify_pse_reg_M2[5])
t_x_M2 <- eff_x_M2 / se_x_M2
verify_pse_reg_M1 <- verify_reg_uncond(n_obs, sdx, sdy, rxy)
R2_M1 <- as.numeric(verify_pse_reg_M1[1]) # should be equivalent or very close to rxy^2
eff_x_M1 <- as.numeric(verify_pse_reg_M1[2]) # should be equivalent or very close to rxy*sdy/sdx
se_x_M1 <- as.numeric(verify_pse_reg_M1[3])
t_x_M1 <- eff_x_M1 / se_x_M1
delta_star_restricted <- ((est_eff - eff_thr)/(eff_x_M1 - est_eff))*
((R2_M2 - R2_M1)/(R2_M3 - R2_M2))
fTable <- matrix(c(R2_M1, R2_M2, R2_M3, R2_M3_oster, # R2 for three reg models
eff_x_M1, eff_x_M2, eff_x_M3, eff_x_M3_oster, # unstd reg coef for X in three reg models
se_x_M1, se_x_M2, se_x_M3, se_x_M3_oster, # unstd reg se for X in three reg models
rxy, ryxGz, beta_x_M3, beta_x_M3_oster, # std reg coef for X in three reg models
t_x_M1, t_x_M2, t_x_M3, t_x_M3_oster, # t values for X in three reg models
# NA, eff_z_M2, eff_z_M3, eff_z_M3_oster, # reg coef for Z in three reg models
# NA, se_z_M2, se_z_M3, se_z_M3_oster, # se for Z in three reg models
# NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, eff_z_M3_oster / se_z_M3_oster, # t for Z in three reg models,
NA, NA, eff_cv_M3, eff_cv_M3_oster, # reg coef for CV in three reg models
NA, NA, se_cv_M3, se_cv_M3_oster, # se for CV in three reg models
NA, NA, eff_cv_M3 / se_cv_M3, eff_cv_M3_oster / se_cv_M3_oster), # t for CV in three reg models
nrow = 8, ncol = 4, byrow = TRUE)
rownames(fTable) <- c("R2", "coef_X", "SE_X", "std_coef_X", "t_X",
# "coef_Z", "SE_Z", "t_Z",
"coef_CV", "SE_CV", "t_CV")
colnames(fTable) <- c("M1:X", "M2:X,Z",
"M3(delta_exact):X,Z,CV",
"M3(delta*):X,Z,CV")
## figure
figTable <- matrix(c("Baseline(M1)", eff_x_M1, R2_M1, "exact",
"Intermediate(M2)", eff_x_M2, R2, "exact",
"Final(M3)", eff_x_M3, FR2max, "exact",
"Intermediate(M2)", eff_x_M2, R2, "star",
"Final(M3)", eff_x_M3_oster, FR2max, "star"),
nrow = 5, ncol = 4, byrow =TRUE)
colnames(figTable) <- c("ModelLabel", "coef_X", "R2", "cat")
figTable <- as.data.frame(figTable)
figTable$ModelLabel <- as.character(figTable$ModelLabel)
figTable$ModelLabel <- factor(figTable$ModelLabel,
levels = c("Baseline(M1)",
"Intermediate(M2)",
"Final(M3)"))
figTable$cat <- as.character(figTable$cat)
figTable$cat <- factor(figTable$cat,
levels = c("exact",
"star"))
figTable$coef_X <- as.numeric(figTable$coef_X)
figTable$R2 <- as.numeric(figTable$R2)
scale <- 1/(round(max(figTable$coef_X)*10)/10)
fig <- ggplot2::ggplot(figTable, ggplot2::aes(x = figTable$ModelLabel)) +
ggplot2::geom_point(ggplot2::aes(y = figTable$coef_X, group = cat, shape = cat), color = "blue", size = 3) +
ggplot2::scale_shape_manual(values = c(16, 1)) +
ggplot2::geom_point(ggplot2::aes(y = R2/scale), color = "#7CAE00", shape = 18, size = 4) +
# scale_linetype_manual(values=c("solid", "dotted")) +
ggplot2::geom_line(ggplot2::aes(y = R2/scale, group = cat), linetype = "solid", color = "#7CAE00") +
ggplot2::geom_line(ggplot2::aes(y = figTable$coef_X, group = cat, linetype = cat), color = "blue") +
ggplot2::scale_y_continuous(
# Features of the first axis
name = "Coeffcient (X)",
# Add a second axis and specify its features
sec.axis = ggplot2::sec_axis(~.* scale,
name="R2")) +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
legend.position = "none",
axis.line.y.right = ggplot2::element_line(color = "#7CAE00"),
axis.title.y.right = ggplot2::element_text(color = "#7CAE00"),
axis.text.y.right = ggplot2::element_text(color = "#7CAE00"),
axis.line.y.left = ggplot2::element_line(color = "blue"),
axis.title.y.left = ggplot2::element_text(color = "blue"),
axis.text.y.left = ggplot2::element_text(color = "blue"),
axis.line.x.bottom = ggplot2::element_line(color = "black"),
axis.text.x.bottom = ggplot2::element_text(color = "black"))
##############################################
######### conditional RIR ####################
# calculating critical r
if (est_eff < 0) {
critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2) * -1
} else {
critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2)
}
critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 2))
# final solutions
cond_RIRpi_fixedY <- (R2 - ryz^2 + ryz^2 * critical_r^2 - critical_r^2) /
(R2 - ryz^2 + ryz^2 * critical_r^2)
cond_RIR_fixedY <- cond_RIRpi_fixedY * n_obs
cond_RIRpi_null <- 1 - sqrt(critical_r^2 /
(R2 - ryz^2 + ryz^2 * critical_r^2))
cond_RIR_null <- cond_RIRpi_null * n_obs
cond_RIRpi_rxyz <- 1 - sqrt((critical_r^2 * (1 - ryz^2)) /
(R2 - ryz^2))
cond_RIR_rxyz <- cond_RIRpi_rxyz * n_obs
##############################################
######### output #############################
if (to_return == "raw_output") {
if (negest == 1) {
cat("Using the absolute value of the estimated effect,
results can be interpreted by symmetry.")
cat("\n")
}
output <- list("delta*" = delta_star,
"delta*restricted" = delta_star_restricted,
"delta_exact" = delta_exact,
"delta_pctbias" = delta_pctbias,
#"cov_oster" = cov_oster,
#"cov_exact" = cov_exact,
"cor_oster" = cor_oster,
"cor_exact" = cor_exact,
"var(Y)" = sdy^2,
"var(X)" = sdx^2,
#"var(Z)" = sdz^2,
"var(CV)" = sdcv^2,
"Table" = fTable,
"Figure" = fig,
"conditional RIR pi (fixed y)" = cond_RIRpi_fixedY,
"conditional RIR (fixed y)" = cond_RIR_fixedY,
"conditional RIR pi (null)" = cond_RIRpi_null,
"conditional RIR (null)" = cond_RIR_null,
"conditional RIR pi (rxyGz)" = cond_RIRpi_rxyz,
"conditional RIR (rxyGz)" = cond_RIR_rxyz)
return(output)
}
if (to_return == "print") {
cat("This function calculates delta* and the exact value of delta.")
cat("\n")
if (negest == 1) {
cat("Using the absolute value of the estimated effect,
results can be interpreted by symmetry.")
cat("\n")
}
cat(sprintf("delta* is %.3f (assuming no covariates in the baseline model M1),
the exact delta is %.3f, with a bias of %.3f%%.",
delta_star, delta_exact, delta_pctbias))
cat("\n")
cat(sprintf("With delta*, the coefficient in the final model will be %.3f.
With the exact delta, the coefficient will be %.3f.",
eff_x_M3_oster,eff_x_M3))
cat("\n")
cat("Use to_return = raw_ouput to see more specific results
and graphic presentation of the result.")
cat("\n")
cat("\n")
cat("This function also calculates conditional RIR that
invalidates the statistical inference.")
cat("\n")
cat("If the replacement cases have a fixed value, then RIR =", cond_RIR_fixedY, ".")
cat("\n")
cat("If the replacement cases follow a null distribution,
then RIR =", cond_RIR_null, ".")
cat("\n")
cat("If the replacement cases satisfy rxy|Z = 0, then RIR =", cond_RIR_rxyz, ".")
cat("\n")
cat("\n")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.