R/test_cop.R

Defines functions test_cop

# COP standards for Coefficient of Proportionality
# test_cop calculates both versions of COP (Oster's approx & exact)

test_cop <- function(est_eff, # unstandardized
                     std_err, # unstandardized
                     n_obs,
                     n_covariates, # the number of z 
                     sdx,
                     sdy,
                     R2, # NOT the adjusted R2, should be the original R2
                     eff_thr = 0, # this is the unstandardized version
                     FR2max_multiplier = 1.3,
                     FR2max = 0, # NOT the adjusted R2, should be the original R2
                     alpha = 0.05,
                     tails = 2, 
                     to_return = to_return){
  
  ## test example
  # est_eff = .125
  # std_err = .050
  # n_obs = 6174
  # n_covariates = 7
  # sdx = .217
  # sdy = .991 
  # R2 = .251
  # eff_thr = 0
  # FR2max = .61
  # test_cop(est_eff = .4, std_err = .1, n_obs = 290, 
  #          sdx = 2, sdy = 6, R2 = .7, eff_thr = 0, FR2max = .8, to_return = "short")
  
  ## prepare input
  df <- n_obs - n_covariates - 3 ## df of M3
  var_x <- sdx^2
  var_y <- sdy^2
  
  ### if the user specifies R2max directly then we use the specified R2max 
  if (FR2max == 0){FR2max <- FR2max_multiplier * R2}
  var_z <- sdz <- 1
  
  ## error message if input is inappropriate
  if (!(std_err > 0)) {stop("Did not run! Standard error needs to be 
                            greater than zero.")}
  if (!(sdx > 0)) {stop("Did not run! Standard deviation of x needs to be 
                        greater than zero.")}
  if (!(sdy > 0)) {stop("Did not run! Standard deviation of y needs to be 
                        greater than zero.")}
  if (!(n_obs > n_covariates + 3)) {
    stop("Did not run! There are too few observations relative to the 
         number of observations and covariates. Please specify a less 
         complex model to use KonFound-It.")}
  if (!(R2 < FR2max)) {stop("Did not run! R2 Max needs to be greater than R2.")}
  if (!(FR2max < 1)) {stop("Did not run! R2 Max needs to be less than 1.")}
  if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) {
    stop("Did not run! Entered values produced Rxz^2 <=0, 
         consider adding more significant digits to your entered values.")}

  # an indicator of whether the use specified est_eff is negative,
  # 1 is yes negative
  negest <- 0 
  if (est_eff < 0) {
    est_eff <- abs(est_eff)
    negest <- 1
  }

  ## now standardize 
  beta_thr <- eff_thr * sdx / sdy
  beta <- est_eff * sdx / sdy
  SE <- std_err * sdx / sdy
  
  ## observed regression, reg y on x Given z
  tyxGz <- beta / SE  ### this should be equal to est_eff / std_err
  ryxGz <- tyxGz / sqrt(df + 1 + tyxGz^2)
  ## df + 1 because omitted variable is NOT included in M2 
  ryxGz_M2 <- tyxGz / sqrt(n_obs + tyxGz^2)
  ## ryxGz_M2 is only for simulation to recover the exact number
  
  ## make sure R2 due to x alone is not larger than overall or observed R2
  if (ryxGz^2 > R2) {illcnd_ryxGz <- TRUE} else {illcond_ryxGz <- FALSE}
  
  ## calculate ryz, rxz, rxy
  ryz <- rzy <- cal_ryz(ryxGz, R2)
  rxz <- rzx <- cal_rxz(var_x, var_y, R2, df + 1, std_err) 
  ## df + 1 because omitted variable is NOT included in M2 
  #### we change the n_obs to df to recover the rxz as in the particular sample

  ## note that in the updated approach rxy is not necessary to calculate 
  ## rxcv_exact, ryxcv_exact and delta_exact
  rxy <- ryx <- cal_rxy(ryxGz, rxz, ryz)
  rxy_M2 <- cal_rxy(ryxGz_M2, rxz, ryz) 
  ## rxy_M2 is only for simulation to recover the exact number
  
  ## baseline regression model, no z (unconditional)
  eff_uncond <- sqrt((var_y / var_x)) * rxy
  t_uncond <- rxy * sqrt(n_obs - 2)/sqrt(1 - rxy^2)
  ## n_obs - 2 - adjust the df in the M1 
  std_err_uncond <- eff_uncond / t_uncond
  R2_uncond <- rxy^2
  
  ## calculate delta_star
  delta_star <- cal_delta_star(FR2max, R2, R2_uncond, est_eff, 
                               eff_thr, var_x, var_y, eff_uncond, 
                               rxz, n_obs)
  ## now introduce cv
  sdcv <- var_cv <- 1
  rcvz <- rzcv <- 0
  v <- 1 - rxz^2 # this is to simplify calculation later
  D <- sqrt(FR2max - R2) # same above
  
  ## calculate rxcv & rycv implied by Oster from delta_star (assumes rcvz=0)
  rxcv_oster <- rcvx_oster <- delta_star * rxz * (sdcv / sdz)
  if (abs(rcvx_oster) <1 && (rcvx_oster^2/v)<1)
  {rcvy_oster <- rycv_oster <- 
    D * sqrt(1 - (rcvx_oster^2 / v)) + 
    (ryx * rcvx_oster) / (v) - 
    (ryz * rcvx_oster * rxz) / (v)}
  
  # Checking beta, R2, se generated by delta_star  with a regression
  verify_oster <- verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, 
                                 rxy, rxz, rzy, rycv_oster, rxcv_oster, rcvz)
  
  # prepare some other values in the final Table (long output)
  R2_M3_oster <- as.numeric(verify_oster[1])
  eff_x_M3_oster <- as.numeric(verify_oster[2]) 
  # should be equivalent or very close to eff_thr
  se_x_M3_oster <- as.numeric(verify_oster[3])
  beta_x_M3_oster <- as.numeric(verify_oster[9]) 
  # should be equivalent or very close to beta_thr
  t_x_M3_oster <- eff_x_M3_oster / se_x_M3_oster 
  eff_z_M3_oster <- as.numeric(verify_oster[4])
  se_z_M3_oster <- as.numeric(verify_oster[5])
  eff_cv_M3_oster <- as.numeric(verify_oster[6])
  se_cv_M3_oster <- as.numeric(verify_oster[7])
  cov_oster <- verify_oster[[11]]
  cor_oster <- verify_oster[[12]]
  
  ## calculate the exact/true rcvx, rcvy, delta 
  ## (updated version that does not need rxy)
  ### the idea is to calculate everything conditional on z
  sdxGz <- sdx * sqrt(1 - rxz^2)
  sdyGz <- sdy * sqrt(1 - ryz^2)
  ryxcvGz_exact_sq <- (FR2max - ryz^2) / (1 - ryz^2)
  ### equation 7 in the manuscript
  rxcvGz_exact <- (ryxGz - sdxGz / sdyGz * beta_thr) / 
    sqrt((sdxGz^2) / (sdyGz^2) * (beta_thr^2) - 
           2 * ryxGz * sdxGz / sdyGz * beta_thr + 
           ryxcvGz_exact_sq)
  ### equation 6 in the manuscript
  rycvGz_exact <-  ryxGz * rxcvGz_exact + 
    sqrt((ryxcvGz_exact_sq - ryxGz^2) *
           (1 - rxcvGz_exact^2))
  ### now get unconditional exact rxcv and rycv
  rycv_exact <- sqrt(1 - ryz^2) * rycvGz_exact
  rxcv_exact <- sqrt(1 - rxz^2) * rxcvGz_exact
  delta_exact <- rxcv_exact / rxz

  ## previous approach - comment out, but could find in cop_pse_auxiliary
  ## exact_result <- cal_delta_exact(ryx, ryz, rxz, beta_thr, FR2max, R2, sdx, sdz)
  ## rxcv_exact <- rcvx_exact <- as.numeric(exact_result[1])
  ## rycv_exact <- rcvy_exact <- as.numeric(exact_result[2])
  ## delta_exact <- as.numeric(exact_result[3])
  
  # Checking beta, R2, se generated by True/Exact Delta  with a regression
  verify_exact <- verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, 
                                 rxy, rxz, rzy, rycv_exact, rxcv_exact, rcvz)
  ### verify_exact[1] == verify_exact[4] == FR2max
  ### verify_exact[2] == eff_thr
  ### verify_exact[5] == beta_thr
  
  # calculate % bias in delta comparing oster's delta_star with true delta
  delta_pctbias <- 100 * (delta_star - delta_exact) / delta_exact
  
  # prepare some other values in the final Table (long output)
  R2_M3 <- as.numeric(verify_exact[1])
  eff_x_M3 <- as.numeric(verify_exact[2]) 
  # should be equivalent or very close to eff_thr
  se_x_M3 <- as.numeric(verify_exact[3])
  beta_x_M3 <- as.numeric(verify_exact[9]) 
  # should be equivalent or very close to beta_thr
  t_x_M3 <- eff_x_M3 / se_x_M3 
  eff_z_M3 <- as.numeric(verify_exact[4])
  se_z_M3 <- as.numeric(verify_exact[5])
  eff_cv_M3 <- as.numeric(verify_exact[6])
  se_cv_M3 <- as.numeric(verify_exact[7])
  cov_exact <- verify_exact[[11]]
  cor_exact <- verify_exact[[12]]
  
  verify_pse_reg_M2 <- verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy_M2, rxz, rzy)
  R2_M2 <- as.numeric(verify_pse_reg_M2[1])
  eff_x_M2 <- as.numeric(verify_pse_reg_M2[2]) 
  # should be equivalent or very close to est_eff
  se_x_M2 <- as.numeric(verify_pse_reg_M2[3]) 
  # should be equivalent or very close to std_err
  eff_z_M2 <- as.numeric(verify_pse_reg_M2[4]) 
  se_z_M2 <- as.numeric(verify_pse_reg_M2[5]) 
  t_x_M2 <- eff_x_M2 / se_x_M2 
  
  verify_pse_reg_M1 <- verify_reg_uncond(n_obs, sdx, sdy, rxy)
  R2_M1 <- as.numeric(verify_pse_reg_M1[1]) # should be equivalent or very close to rxy^2
  eff_x_M1 <- as.numeric(verify_pse_reg_M1[2]) # should be equivalent or very close to rxy*sdy/sdx
  se_x_M1 <- as.numeric(verify_pse_reg_M1[3]) 
  t_x_M1 <- eff_x_M1 / se_x_M1 
  
  delta_star_restricted <- ((est_eff - eff_thr)/(eff_x_M1 - est_eff))*
    ((R2_M2 - R2_M1)/(R2_M3 - R2_M2))
  
  fTable <- matrix(c(R2_M1, R2_M2, R2_M3, R2_M3_oster, # R2 for three reg models
                     eff_x_M1, eff_x_M2, eff_x_M3, eff_x_M3_oster, # unstd reg coef for X in three reg  models
                     se_x_M1, se_x_M2, se_x_M3, se_x_M3_oster, # unstd reg se for X in three reg models
                     rxy, ryxGz, beta_x_M3, beta_x_M3_oster,  # std reg coef for X in three reg models
                     t_x_M1, t_x_M2, t_x_M3, t_x_M3_oster, # t values for X in three reg models
                     # NA, eff_z_M2, eff_z_M3,  eff_z_M3_oster, # reg coef for Z in three reg models
                     # NA, se_z_M2, se_z_M3, se_z_M3_oster,  # se for Z in three reg models
                     # NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, eff_z_M3_oster / se_z_M3_oster, # t for Z in three reg models,
                     NA, NA, eff_cv_M3, eff_cv_M3_oster, # reg coef for CV in three reg models
                     NA, NA, se_cv_M3, se_cv_M3_oster, # se for CV in three reg models
                     NA, NA, eff_cv_M3 / se_cv_M3, eff_cv_M3_oster / se_cv_M3_oster), # t for CV in three reg models
                   nrow = 8, ncol = 4, byrow = TRUE) 
  
  rownames(fTable) <- c("R2", "coef_X", "SE_X", "std_coef_X", "t_X",
                        # "coef_Z", "SE_Z", "t_Z",
                        "coef_CV", "SE_CV", "t_CV")
  
  colnames(fTable) <- c("M1:X", "M2:X,Z", 
                        "M3(delta_exact):X,Z,CV",
                        "M3(delta*):X,Z,CV")
  
  ## figure
  figTable <- matrix(c("Baseline(M1)", eff_x_M1, R2_M1, "exact", 
                             "Intermediate(M2)", eff_x_M2, R2, "exact", 
                             "Final(M3)", eff_x_M3, FR2max, "exact",
                             "Intermediate(M2)", eff_x_M2, R2, "star", 
                             "Final(M3)", eff_x_M3_oster, FR2max, "star"), 
                     nrow = 5, ncol = 4, byrow =TRUE)
  colnames(figTable) <- c("ModelLabel", "coef_X", "R2", "cat")

  figTable <- as.data.frame(figTable)
  figTable$ModelLabel <- as.character(figTable$ModelLabel)
  figTable$ModelLabel <- factor(figTable$ModelLabel, 
                                levels = c("Baseline(M1)",
                                            "Intermediate(M2)",
                                            "Final(M3)"))
  figTable$cat <- as.character(figTable$cat)
  figTable$cat <- factor(figTable$cat, 
                         levels = c("exact",
                                    "star"))
  figTable$coef_X <- as.numeric(figTable$coef_X)
  figTable$R2 <- as.numeric(figTable$R2)

scale <- 1/(round(max(figTable$coef_X)*10)/10)

fig <- ggplot2::ggplot(figTable, ggplot2::aes(x = figTable$ModelLabel)) +
    ggplot2::geom_point(ggplot2::aes(y = figTable$coef_X, group = cat, shape = cat), color = "blue", size = 3) + 
    ggplot2::scale_shape_manual(values = c(16, 1)) +
    ggplot2::geom_point(ggplot2::aes(y = R2/scale), color = "#7CAE00", shape = 18, size = 4) + 
    # scale_linetype_manual(values=c("solid", "dotted")) +
    ggplot2::geom_line(ggplot2::aes(y = R2/scale, group = cat), linetype = "solid", color = "#7CAE00") + 
    ggplot2::geom_line(ggplot2::aes(y = figTable$coef_X, group = cat, linetype = cat), color = "blue") + 
    ggplot2::scale_y_continuous(
      # Features of the first axis
      name = "Coeffcient (X)",
      # Add a second axis and specify its features
      sec.axis = ggplot2::sec_axis(~.* scale, 
                          name="R2")) +
    ggplot2::theme(axis.title.x = ggplot2::element_blank(),
          legend.position = "none",
          axis.line.y.right = ggplot2::element_line(color = "#7CAE00"),
          axis.title.y.right = ggplot2::element_text(color = "#7CAE00"),
          axis.text.y.right = ggplot2::element_text(color = "#7CAE00"),
          axis.line.y.left = ggplot2::element_line(color = "blue"),
          axis.title.y.left = ggplot2::element_text(color = "blue"),
          axis.text.y.left = ggplot2::element_text(color = "blue"),
          axis.line.x.bottom = ggplot2::element_line(color = "black"),
          axis.text.x.bottom = ggplot2::element_text(color = "black"))
    
  ##############################################
  ######### conditional RIR ####################
  
  # calculating critical r
  if (est_eff < 0) {
    critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2) * -1
  } else {
    critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2)
  }
  critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 2))
  
  # final solutions 
  cond_RIRpi_fixedY <- (R2 - ryz^2 + ryz^2 * critical_r^2 - critical_r^2) / 
    (R2 - ryz^2 + ryz^2 * critical_r^2)
  cond_RIR_fixedY <- cond_RIRpi_fixedY * n_obs
  
  cond_RIRpi_null <- 1 - sqrt(critical_r^2 / 
                                (R2 - ryz^2 + ryz^2 * critical_r^2))
  cond_RIR_null <- cond_RIRpi_null * n_obs
  
  cond_RIRpi_rxyz <- 1 - sqrt((critical_r^2 * (1 - ryz^2)) / 
                                (R2 - ryz^2))
  cond_RIR_rxyz <- cond_RIRpi_rxyz * n_obs
  
  ##############################################
  ######### output #############################
  
  if (to_return == "raw_output") {
    if (negest == 1) {
      cat("Using the absolute value of the estimated effect, 
          results can be interpreted by symmetry.")
      cat("\n")
    }
    output <- list("delta*" = delta_star,
                   "delta*restricted" = delta_star_restricted,
                   "delta_exact" = delta_exact, 
                   "delta_pctbias" = delta_pctbias,
                   #"cov_oster" = cov_oster,
                   #"cov_exact" = cov_exact,
                   "cor_oster" = cor_oster,
                   "cor_exact" = cor_exact,
                   "var(Y)" = sdy^2,
                   "var(X)" = sdx^2,
                  #"var(Z)" = sdz^2,
                   "var(CV)" = sdcv^2,
                   "Table" = fTable,
                   "Figure" = fig,
                   "conditional RIR pi (fixed y)" = cond_RIRpi_fixedY,
                   "conditional RIR (fixed y)" = cond_RIR_fixedY,
                   "conditional RIR pi (null)" = cond_RIRpi_null,
                   "conditional RIR (null)" = cond_RIR_null,
                   "conditional RIR pi (rxyGz)" = cond_RIRpi_rxyz,
                   "conditional RIR (rxyGz)" = cond_RIR_rxyz)
    return(output)
  }
  
  if (to_return == "print") {
    cat("This function calculates delta* and the exact value of delta.")
    cat("\n")
    if (negest == 1) {
      cat("Using the absolute value of the estimated effect, 
          results can be interpreted by symmetry.")
      cat("\n")
    }
    cat(sprintf("delta* is %.3f (assuming no covariates in the baseline model M1), 
                the exact delta is %.3f, with a bias of %.3f%%.", 
                delta_star, delta_exact, delta_pctbias))
    cat("\n")
    cat(sprintf("With delta*, the coefficient in the final model will be %.3f. 
                With the exact delta, the coefficient will be %.3f.",  
                eff_x_M3_oster,eff_x_M3))
    cat("\n")
    cat("Use to_return = raw_ouput to see more specific results 
        and graphic presentation of the result.")
    cat("\n")
    cat("\n")
    cat("This function also calculates conditional RIR that 
        invalidates the statistical inference.")
    cat("\n")
    cat("If the replacement cases have a fixed value, then RIR =", cond_RIR_fixedY, ".")
    cat("\n")
    cat("If the replacement cases follow a null distribution, 
        then RIR =", cond_RIR_null, ".")
    cat("\n")
    cat("If the replacement cases satisfy rxy|Z = 0, then RIR =", cond_RIR_rxyz, ".")
    cat("\n")
    cat("\n")
  }
  
}
jrosen48/konfound documentation built on April 13, 2024, 3:47 a.m.