R/measures.R
In jointVIP: Prioritize Variables with Joint Variable Importance Plot in Observational Study Design

Documented in check_measures get_boot_measures get_measures get_post_measures

#' Prepare data frame to plot standardized omitted variable bias
#' Marginal standardized mean differences and outcome correlation
#'
#' @param object jointVIP object
#' @param smd calculate standardized mean difference either using `cross-sample` or `pooled`
#' @return measures needed for jointVIP
#' @export
#' @importFrom stats sd var cor complete.cases
get_measures = function(object, smd="cross-sample"){
  treated <- object$analysis_df[, object$treatment]
  covariates <- names(object$analysis_df)[!(names(object$analysis_df)
                                          %in% c(object$treatment,
                                                 object$outcome))]

  md <- apply(object$analysis_df[,covariates], 2,
              function(x){
                mean(x[treated == 1]) - mean(x[treated == 0])
              })

  cs_denom <- apply(object$pilot_df[,covariates], 2, stats::sd)
  pooled_denom <- apply(object$analysis_df[,covariates], 2,
                          function(x){
                            if(stats::var(x[treated == 1]) == 0 &
                               stats::var(x[treated == 0]) == 0){NA}
                            else{
                              sqrt(stats::var(x[treated == 1])/2 +
                                     stats::var(x[treated == 0])/2)
                            }
                          })
  outcome_cor <- apply(object$pilot_df[,covariates],
                       2,
                       function(x){
                         stats::cor(x, object$pilot_df[,object$outcome])
                       })

  if(!smd %in% c("cross-sample", "pooled")){
    stop("smd options only include `cross-sample` or `pooled`")
  } else {
    smd_calc <- if(smd=="cross-sample"){md/cs_denom}else{md/pooled_denom}
  }

  measures = data.frame(
    outcome_cor = outcome_cor,
    std_md = smd_calc,
    bias = outcome_cor * smd_calc
  )

  if('post_jointVIP' %in% class(object)){
    denom = if(smd=="cross-sample"){cs_denom}else{pooled_denom}
    measures = data.frame(
      outcome_cor = outcome_cor,
      std_md = smd_calc,
      bias = outcome_cor * smd_calc,
      pre_sd = denom
    )
  }
  measures = check_measures(measures)
  return(measures)
}



#' Check measures
#' Check to see if there is any missing values or variables
#' without any variation or identical rows (only unique rows will be used)
#'
#' @param measures measures needed for jointVIP
#' @return measures needed for jointVIP
check_measures = function(measures){

  if(nrow(measures[duplicated(measures) |
                   duplicated(measures, fromLast = TRUE),]) > 0){
    warning(paste0(c("Variables",
                     row.names(measures[duplicated(measures) |
                                          duplicated(measures, fromLast = TRUE),]),
                     "measures are duplicated (all multiples are shown).",
                     "\nOnly unique variables will be used."
    )," "))
  }

  clean_measures <- measures[!duplicated(measures),]

  if(any(rowSums(is.na(clean_measures))>0)){
    warning(paste0(c("Variable(s)",
                     rownames(clean_measures)[rowSums(is.na(clean_measures))>0],
                     "contain missing values.",
                     "\nThey are dropped when plotting."
    )," "))
  }

  if(any(clean_measures$std_md == 0)){
    warning(paste0(c("The standardized mean difference for variable(s)",
                     rownames(clean_measures)[sum(clean_measures$std_md == 0)>0],
                     "are 0.",
                     "\nTheir biases cannot be calculated."
    )," "))
  }

  clean_measures <- clean_measures[complete.cases(clean_measures),]
  if (all(dim(clean_measures) == c(0, 0))) {
    stop("measures is empty, please check for errors that may have occurred")
  }
  clean_measures
}

#' Post-measures data frame to plot post-standardized omitted variable bias
#'
#' @param object post_jointVIP object
#' @param smd calculate standardized mean difference either using `cross-sample` or `pooled`
#' @return measures needed for jointVIP
#' @export
get_post_measures <- function(object, smd = "cross-sample"){
  measures <- get_measures(object, smd = smd)

  treated <- object$post_analysis_df[, object$treatment]
  covariates <- names(object$post_analysis_df)[!(names(object$post_analysis_df)
                                                 %in% c(object$treatment,
                                                        object$outcome))]
  
  w0 = object$wts[treated == 0]
  w1 = object$wts[treated == 1]
  
  post_md <- colSums(object$post_analysis_df[treated == 1,covariates]*w1)/sum(w1) - 
    colSums(object$post_analysis_df[treated == 0,covariates]*w0)/sum(w0)

  post_measures = measures
  post_measures$post_std_md <- post_md/measures$pre_sd
  post_measures$post_bias <- post_measures$post_std_md*post_measures$outcome_cor
  post_measures[, c("outcome_cor",
                    "std_md",
                    "bias",
                    "post_std_md",
                    "post_bias")]
}

#' Calculate bootstrapped variation
#' additional tool to help calculate the uncertainty of each variable's bias
#'
#' @param object jointVIP object
#' @param smd calculate standardized mean difference either using `cross-sample` or `pooled`
#' @param use_abs TRUE (default) for absolute measures
#' @param B 100 (default) for the number of times the bootstrap step wished to run
#' @return bootstrapped measures needed for bootstrap-jointVIP
#' @export
#' @importFrom stats sd var cor complete.cases
get_boot_measures = function(object,
                             smd = "cross-sample",
                             use_abs = TRUE,
                             B = 100) {
  if(!is.numeric(B)){
    stop("B is the number of bootstrap step should run; please input a numeric\nThe ceiling of such number will be used.")
  } else {
    if(B <= 10){
      stop("B is too small please make it a larger number")
    } else if (B >= 990000) {
      stop("B too large, please specify this number to be under 990000")
    }
  }

  B = ceiling(B)

  # original measures
  og = get_measures(object = object, smd = smd)

  # 3d bootstrap array result
  result <- array(0, dim = c(nrow(og),
                             2,
                             B))

  # select from random list of large numbers
  seeds <- paste(sample(1e4:(1e6 - 1), B, replace = F))

  pilot_df = object$pilot_df
  analysis_df = object$analysis_df

  for (b in (1:B)) {
    set.seed(as.numeric(seeds[b]))
    boot_pilot_df = pilot_df[sample(1:nrow(pilot_df),
                                    size = nrow(pilot_df),
                                    replace = T), ]
    boot_analysis_df = analysis_df[sample(1:nrow(analysis_df),
                                          size = nrow(analysis_df),
                                          replace = T), ]
    temp_measure = get_measures(
      object =
        create_jointVIP(
          object$treatment,
          object$outcome,
          names(analysis_df)[!names(analysis_df) %in% c(object$treatment,
                                                        object$outcome)],
          boot_pilot_df,
          boot_analysis_df
        ),
      smd = smd
    )[,c('outcome_cor', 'std_md')]
    result[, , b] = as.matrix(temp_measure)
  }
  dimnames(result) = list(row.names(og),
                          c('outcome_cor',
                            'std_md'),
                          seeds)

  if(use_abs){
    result = abs(result)
  }

  boot_sd = apply(
    result * is.finite(result),
    c(1, 2),
    stats::quantile,
    probs = c(0.025, 0.975),
    na.rm = TRUE
  )

  return(boot_sd)
}

Any scripts or data that you put into this service are public.

jointVIP documentation built on March 31, 2023, 7:39 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

jointVIP
Prioritize Variables with Joint Variable Importance Plot in Observational Study Design

R/measures.R
In jointVIP: Prioritize Variables with Joint Variable Importance Plot in Observational Study Design

Defines functions get_boot_measures get_post_measures check_measures get_measures

Documented in check_measures get_boot_measures get_measures get_post_measures

Try the jointVIP package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

jointVIP Prioritize Variables with Joint Variable Importance Plot in Observational Study Design

R/measures.R In jointVIP: Prioritize Variables with Joint Variable Importance Plot in Observational Study Design

Defines functions get_boot_measures get_post_measures check_measures get_measures

Documented in check_measures get_boot_measures get_measures get_post_measures

Try the jointVIP package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

jointVIP
Prioritize Variables with Joint Variable Importance Plot in Observational Study Design

R/measures.R
In jointVIP: Prioritize Variables with Joint Variable Importance Plot in Observational Study Design