# Load packages and set options

options(kableExtra.latex.load_packages = FALSE)
library(kableExtra)
knitr::knit_hooks$set(document = function(x) {sub('\\usepackage[]{color}', '\\usepackage[table]{xcolor}', x, fixed = TRUE)})
library(dplyr)
library(tidyr)
library(xtable)
library(gridExtra)
library(cowplot)

readRdsCamelCase <- function(file) {
  result <- readRDS(file)
  colnames(result) <- SqlRender::snakeCaseToCamelCase(colnames(result))
  return(result)
}
# Set appropriate paths
studyFolder <- path.expand("~/Dropbox/Documents_Academic/Covid19AlphaBlocker")
databaseIds <- c("SIDIAP", "VA-OMOP", "CUIMC", "IQVIA_OpenClaims", "Optum_DOD", "Optum_EHR_COVID")
mdrrArgRank <- c(4, 2, 1, 5, 6, 3) # OpenClaims, VA, SIDIAP, Optum DOD, Optum EHR,  CUIMC
databaseIdLabels <- c("SIDIAP", "VA", "CUIMC", "OpenClaims", "Optum DOD", "Optum EHR", "Meta-analysis")
names(databaseIdLabels) <- c(databaseIds, "Meta-analysis")
dataDirs <- sapply(databaseIds, function (databaseId) file.path(studyFolder, databaseId))
# Extract and set appropriate IDs.
cohorts <- read.csv(file = system.file("settings", "CohortsToCreate.csv", package = "Covid19SusceptibilityAlphaBlockers")) %>% select(cohortId, shinyName)

alphaBlockerId <- cohorts %>% filter(shinyName == "Alpha-1 blocker") %>% select(cohortId) %>% pull()
fiveAlphaId <- cohorts %>% filter(shinyName == "5ARI / PDE5") %>% select(cohortId) %>% pull()

covidDiagnosisId <- cohorts %>% filter(shinyName == "Diagnosis") %>% select(cohortId) %>% pull()
hospitalizationId <- cohorts %>% filter(shinyName == "Hospitalization") %>% select(cohortId) %>% pull()
intensiveServiceId <- cohorts %>% filter(shinyName == "Intensive services") %>% select(cohortId) %>% pull()

prettyNames <- data.frame(
  cohortId = c(alphaBlockerId, fiveAlphaId),
  cohortName = c("Alpha-1 blocker", "5ARI / PDE5"))

analysisIdToKeep <- c(3, 4, 5, 6)
primaryAnalysisId <- 5 # Full stratified PS

Cohort balance and systematic error diagnostics

plotEstimateVsError <- function(controlResults, calibrated, xLabel = NULL, coverageTextColor = "black") {

  if (calibrated) {
    d <- data.frame(logRr = controlResults$calibratedLogRr,
                    seLogRr = controlResults$calibratedSeLogRr,
                    ci95Lb = controlResults$calibratedCi95Lb,
                    ci95Ub = controlResults$calibratedCi95Ub,
                    trueRr = controlResults$effectSize)
    if (is.null(xLabel)) {
      xLabel <- "Hazard ratio (after calibration)"
    }

  } else {
    d <- data.frame(logRr = controlResults$logRr,
                    seLogRr = controlResults$seLogRr,
                    ci95Lb = controlResults$ci95Lb,
                    ci95Ub = controlResults$ci95Ub,
                    trueRr = controlResults$effectSize)
    if (is.null(xLabel)) {
      xLabel <- "Hazard ratio (before calibration)"
    }
  }
  d <- d[!is.na(d$logRr), ]
  d <- d[!is.na(d$ci95Lb), ]
  d <- d[!is.na(d$ci95Ub), ]
  if (nrow(d) == 0) {
    return(NULL)
  }
  d$Significant <- d$ci95Lb > 1 | d$ci95Ub < 1

  oneRow <- data.frame(nLabel = paste0(formatC(nrow(d), big.mark = ","), " estimates"),
                       meanLabel = paste0(formatC(100 *
                                                    mean(!d$Significant, na.rm = TRUE), digits = 1, format = "f"), "% of CIs includes 1"))

  breaks <- c(0.125, 0.25, 0.5, 1, 2, 4, 8)
  theme <- ggplot2::element_text(colour = "#000000", size = 12)
  themeRA <- ggplot2::element_text(colour = "#000000", size = 12, hjust = 1)
  themeLA <- ggplot2::element_text(colour = "#000000", size = 12, hjust = 0)

  alpha <- 1 - min(0.95 * (nrow(d)/50000)^0.1, 0.95)
  yUpperLim <- 1
  plot <- ggplot2::ggplot(d, ggplot2::aes(x = logRr, y = seLogRr)) +
    ggplot2::geom_vline(xintercept = log(breaks), colour = "#AAAAAA", lty = 1, size = 0.5) +
    ggplot2::geom_abline(ggplot2::aes(intercept = 0, slope = 1/qnorm(0.025)),
                         colour = rgb(0.8, 0, 0),
                         linetype = "dashed",
                         size = 1,
                         alpha = 0.5) +
    ggplot2::geom_abline(ggplot2::aes(intercept = 0, slope = 1/qnorm(0.975)),
                         colour = rgb(0.8, 0, 0),
                         linetype = "dashed",
                         size = 1,
                         alpha = 0.5) +
    ggplot2::geom_point(size = 2, color = rgb(0, 0, 0, alpha = 0.05), alpha = alpha, shape = 16) +
    ggplot2::geom_hline(yintercept = 0) +
    ggplot2::geom_label(x = log(0.11),
                        y = 0.99,
                        alpha = 1,
                        hjust = "left",
                        ggplot2::aes(label = nLabel),
                        size = 5,
                        data = oneRow) +
    ggplot2::geom_label(x = log(0.11),
                        y = 0.9,
                        alpha = 1,
                        hjust = "left",
                        ggplot2::aes(label = meanLabel),
                        colour = coverageTextColor,
                        size = 5,
                        data = oneRow) +
    ggplot2::scale_x_continuous(xLabel, limits = log(c(1 / 8, 8)), breaks = log(breaks), labels = breaks) +
    ggplot2::scale_y_continuous("Standard Error", limits = c(0, yUpperLim)) +
    ggplot2::theme(panel.grid.minor = ggplot2::element_blank(),
                   panel.background = ggplot2::element_blank(),
                   panel.grid.major = ggplot2::element_blank(),
                   axis.ticks = ggplot2::element_blank(),
                   axis.text.y = themeRA,
                   axis.text.x = theme,
                   axis.title = theme,
                   legend.key = ggplot2::element_blank(),
                   strip.text.x = theme,
                   strip.background = ggplot2::element_blank(),
                   aspect.ratio = 1 / yUpperLim / qnorm(0.975),
                   plot.margin=grid::unit(c(0,0,0,1), "pt"),
                   legend.position = "none")
  return(plot)
}

getAllControls <- function() {
  pathToCsv <- system.file("settings", "NegativeControls.csv", package = "Covid19SusceptibilityAlphaBlockers")
  allControls <- read.csv(pathToCsv)
  allControls$oldOutcomeId <- allControls$outcomeId
  allControls$targetEffectSize <- rep(1, nrow(allControls))
  return(allControls)
}

getControlResults <- function(tId, cId, aId, dId, negativeControlOutcomeIds, results) {
  results <- results %>% filter(targetId == tId, comparatorId == cId, 
                                analysisId == aId, databaseId == dId, 
                                outcomeId %in% negativeControlOutcomeIds) %>%
    mutate(effectSize = 1)
  return(results)
}

negativeControlOutcomeIds <- unique(getAllControls()$outcomeId)
tableau_color <- list(
  blue = '#4E79A7',
  orange = '#F28E2B',
  red = '#E15759',
  light_teal = '#76B7B2',
  green = '#59A14F',
  yellow = '#EDC948',
  purple = '#B07AA1',
  pink = '#FF9DA7',
  brown = '#9C755F',
  light_gray = '#BAB0AC'
)

plotBeforeAndAfterCalibration <- function(
    controlResults, xLabel = "Hazard Ratios", textScale = 1,
    color = list(calibrated = tableau_color$blue, uncalibrated = tableau_color$orange)
  ) {
  calibratedData <- data.frame(logRr = controlResults$calibratedLogRr,
                               seLogRr = controlResults$calibratedSeLogRr,
                               ci95Lb = controlResults$calibratedCi95Lb,
                               ci95Ub = controlResults$calibratedCi95Ub,
                               trueRr = controlResults$effectSize)
  uncalibratedData <- data.frame(logRr = controlResults$logRr,
                                 seLogRr = controlResults$seLogRr,
                                 ci95Lb = controlResults$ci95Lb,
                                 ci95Ub = controlResults$ci95Ub,
                                 trueRr = controlResults$effectSize)
  calibratedData <- calibratedData[!is.na(calibratedData$logRr), ]
  calibratedData <- calibratedData[!is.na(calibratedData$ci95Lb), ]
  calibratedData <- calibratedData[!is.na(calibratedData$ci95Ub), ]
  calibratedData$Significant <- calibratedData$ci95Lb > 1 | calibratedData$ci95Ub < 1
  uncalibratedData <- uncalibratedData[!is.na(uncalibratedData$logRr), ]
  uncalibratedData <- uncalibratedData[!is.na(uncalibratedData$ci95Lb), ]
  uncalibratedData <- uncalibratedData[!is.na(uncalibratedData$ci95Ub), ]
  uncalibratedData$Significant <- uncalibratedData$ci95Lb > 1 | uncalibratedData$ci95Ub < 1
  oneRow <- data.frame(
    nLabel = paste0(formatC(nrow(uncalibratedData), big.mark = ","), " estimates"),
    uncalibratedMeanLabel = paste0(formatC(100 * mean(!uncalibratedData$Significant, na.rm = TRUE), digits = 1, format = "f"), "% of CIs includes 1"),
    calibratedMeanLabel = paste0(formatC(100 * mean(!calibratedData$Significant, na.rm = TRUE), digits = 1, format = "f"), "% of CIs includes 1")
  )

  breaks <- c(0.125, 0.25, 0.5, 1, 2, 4, 8)
  theme <- ggplot2::element_text(colour = "#000000", size = textScale * 12)
  themeRA <- ggplot2::element_text(colour = "#000000", size = textScale * 12, hjust = 1)
  themeLA <- ggplot2::element_text(colour = "#000000", size = textScale * 12, hjust = 0)

  alpha <- 1 - min(0.95 * (nrow(calibratedData)/50000)^0.1, 0.95)
  yUpperLim <- 1
  plot <- ggplot2::ggplot(calibratedData, ggplot2::aes(x = logRr, y = seLogRr)) +
    ggplot2::geom_vline(xintercept = log(breaks), colour = "#AAAAAA", lty = 1, size = 0.5) +
    ggplot2::geom_abline(ggplot2::aes(intercept = 0, slope = 1/qnorm(0.025)),
                         colour = rgb(0.8, 0, 0),
                         linetype = "dashed",
                         size = 1,
                         alpha = 0.5) +
    ggplot2::geom_abline(ggplot2::aes(intercept = 0, slope = 1/qnorm(0.975)),
                         colour = rgb(0.8, 0, 0),
                         linetype = "dashed",
                         size = 1,
                         alpha = 0.5) +
    ggplot2::geom_point(size = 2, color = adjustcolor(color$calibrated, alpha.f = .05), alpha = alpha, shape = 16) +
    ggplot2::geom_point(size = 2, color = adjustcolor(color$uncalibrated, alpha.f = .05), alpha = alpha, shape = 16, data = uncalibratedData) +
    ggplot2::geom_hline(yintercept = 0) +
    ggplot2::geom_label(x = log(0.11),
                        y = 0.99,
                        alpha = 1,
                        hjust = "left",
                        ggplot2::aes(label = nLabel),
                        size = textScale * 4.75,
                        data = oneRow) +
    ggplot2::geom_label(x = log(0.11),
                        y = 0.99 - textScale * 0.09,
                        alpha = 1,
                        hjust = "left",
                        ggplot2::aes(label = uncalibratedMeanLabel),
                        colour = color$uncalibrated,
                        size = textScale * 4.75,
                        data = oneRow) +
    ggplot2::geom_label(x = log(0.11),
                        y = 0.99 - 2 * textScale * 0.09,
                        alpha = 1,
                        hjust = "left",
                        ggplot2::aes(label = calibratedMeanLabel),
                        colour = color$calibrated,
                        size = textScale * 4.75,
                        data = oneRow) +
    ggplot2::scale_x_continuous(xLabel, limits = log(c(1 / 8, 8)), breaks = log(breaks), labels = breaks) +
    ggplot2::scale_y_continuous("Standard Error", limits = c(0, yUpperLim)) +
    ggplot2::theme(panel.grid.minor = ggplot2::element_blank(),
                   panel.background = ggplot2::element_blank(),
                   panel.grid.major = ggplot2::element_blank(),
                   axis.ticks = ggplot2::element_blank(),
                   axis.text.y = themeRA,
                   axis.text.x = theme,
                   axis.title = theme,
                   legend.key = ggplot2::element_blank(),
                   strip.text.x = theme,
                   strip.background = ggplot2::element_blank(),
                   aspect.ratio = 1 / yUpperLim / qnorm(0.975),
                   plot.margin=grid::unit(c(0,0,0,1), "pt"),
                   legend.position = "none")
  return(plot)
}

Before and after calibration plot

plotCalibrationWrapper <- function(databaseId, analysisId, textScale = 1) {
  covariate <- readRDS(file.path(
    studyFolder, databaseId, "shinyData", paste0("covariate_", databaseId, ".rds")
  ))
  colnames(covariate) <- SqlRender::snakeCaseToCamelCase(colnames(covariate))

  cmResults <- readRDS(file.path(studyFolder, databaseId, "shinyData", paste0("cohort_method_result_", databaseId, ".rds")))
  colnames(cmResults) <- SqlRender::snakeCaseToCamelCase(colnames(cmResults))

  controlResults <- getControlResults(
    alphaBlockerId, fiveAlphaId, analysisId, databaseId, negativeControlOutcomeIds, cmResults
  )
  plot <- plotBeforeAndAfterCalibration(controlResults, textScale = textScale)
  return(plot)
}

analysisId <- 5
calibrationPlotList <- lapply(
  databaseIds[mdrrArgRank],
  function (databaseId) plotCalibrationWrapper(databaseId, analysisId)
)

labelPlotList <- lapply(
  databaseIds[mdrrArgRank],
  function (databaseId) {
    label <- databaseIdLabels[[databaseId]]
    labelCharWidth <- 18
    nSpace <- labelCharWidth - nchar(label)
    nSpaceTail <- floor(nSpace / 2)
    label <- paste0(label, paste0(rep(" ", nSpaceTail), collapse = ""))
    ggplot2::ggplot() + 
      ggplot2::annotate("text", x = 0, y = 0, size = 5.5, angle = 0, label = label) +
      ggplot2::theme_void()
  }
)

# Add database name.
plotList <- lapply(
  1:length(databaseIds), 
  function (i) list(labelPlotList[[i]], calibrationPlotList[[i]])
)
plotList <- do.call(c, plotList)

# Insert empty space between the columns
n_plot <- 2 * length(databaseIds)
plotList <- c(
  plotList[1:(n_plot / 2)], 
  rep(list(NULL), n_plot / 2), 
  plotList[(n_plot / 2 + 1):n_plot]
)

plot <- cowplot::plot_grid(
  plotlist = plotList, ncol = 3, byrow = FALSE,
  rel_widths = c(1, .05, 1), rel_heights = rep(c(.075, 1), length(databaseIds) / 2)
)
analysisType <- c("unadjusted", "adjusted", "min-stratified", "min-matched", "stratified", "matched")[analysisId]
cowplot::save_plot(
  sprintf(
    "before_and_after_calibration_plot_for_%s_analysis.png", 
    analysisType
  ),
  plot, base_asp = 1, base_height = 2 * length(databaseIds),
  units = "in", dpi = 300
)

Before and after plot per larger and smaller databases

largerDataIndex <- c(4, 2, 5) # OpenClaims, VA, Optum DOD,
smallerDataIndex <- c(6, 1, 3) # Optum EHR, SIDIAP, CUIMC
subsetFlag <- c('larger', 'smaller')[1]
if (subsetFlag == 'larger') {
  subsetDataIndex <- largerDataIndex
} else {
  subsetDataIndex <- smallerDataIndex
}
subsetDatabaseIds <- databaseIds[subsetDataIndex]

analysisId <- 1
calibrationPlotList <- lapply(
  subsetDatabaseIds,
  function (databaseId) plotCalibrationWrapper(databaseId, analysisId, textScale = 1.2)
)

labelPlotList <- lapply(
  subsetDatabaseIds,
  function (databaseId) {
    label <- databaseIdLabels[[databaseId]]
    labelCharWidth <- 18
    nSpace <- labelCharWidth - nchar(label)
    nSpaceTail <- floor(nSpace / 2)
    label <- paste0(label, paste0(rep(" ", nSpaceTail), collapse = ""))
    ggplot2::ggplot() + 
      ggplot2::annotate("text", x = 0, y = 0, size = 6, angle = -90, label = label) +
      ggplot2::theme_void()
  }
)

# Add database name.
plotList <- lapply(
  1:length(subsetDatabaseIds), 
  function (i) {
    plotWithLabel <- list(calibrationPlotList[[i]], labelPlotList[[i]])
    if (i != length(subsetDatabaseIds)) {
      plotWithLabel <- c(plotWithLabel, rep(list(NULL), 2))
    }
    return(plotWithLabel)
  }
)
plotList <- do.call(c, plotList)

# Add analysis name
if (analysisId == 1) {
  analysisLabel <- "With no covariate adjustments"
} else if (analysisId == 5) {
  analysisLabel <- "With PS stratification"
}
labelCharWidth <- 54
nSpace <- labelCharWidth - nchar(analysisLabel)
nSpaceHead <- floor(nSpace / 2)
analysisLabel <- paste0(
  paste0(rep(" ", nSpaceHead), collapse = ""),
  analysisLabel
)
analysisLabelPlot <-
  ggplot2::ggplot() + 
  ggplot2::annotate("text", x = 0, y = 0, size = 7.5, angle = 0, label = analysisLabel) +
  ggplot2::theme_void()
plotList <- c(list(analysisLabelPlot, NULL), plotList)

plot <- cowplot::plot_grid(
  plotlist = plotList, ncol = 2, 
  rel_widths = c(1, .05), 
  rel_heights = c(0.15, 1, .05, 1, 0.05, 1)
)

analysisType <- c("unadjusted", "adjusted", "min-stratified", "min-matched", "stratified", "matched")[analysisId]
cowplot::save_plot(
  sprintf(
    "before_and_after_calibration_plot_for_%s_analysis_on_%s_databases.png", 
    analysisType, subsetFlag
  ),
  plot, base_asp = .7, base_height = 2 * length(databaseIds),
  units = "in", dpi = 300
)

\clearpage

VA-OMOP

vaCovariate <- readRDS(file.path(studyFolder, "VA-OMOP", "shinyData", paste0("covariate_", "VA-OMOP", ".rds")))
colnames(vaCovariate) <- SqlRender::snakeCaseToCamelCase(colnames(vaCovariate))

vaResults <- readRDS(file.path(studyFolder, "VA-OMOP", "shinyData", paste0("cohort_method_result_", "VA-OMOP", ".rds")))
colnames(vaResults) <- SqlRender::snakeCaseToCamelCase(colnames(vaResults))


controlResults <- getControlResults(
  alphaBlockerId, fiveAlphaId, primaryAnalysisId, "VA-OMOP", negativeControlOutcomeIds, vaResults
)
plotEstimateVsError(controlResults, calibrated = FALSE, xLabel = "Hazard ratio")
ggplot2::ggsave("negative_control_uncalibrated_full_strat_estimates_and_std_errors_for_va.png")

minimalAnalysisId <- 4 # age, gender, month, and year
controlResults <- getControlResults(
  alphaBlockerId, fiveAlphaId, minimalAnalysisId, "VA-OMOP", negativeControlOutcomeIds, vaResults
)
plotEstimateVsError(controlResults, calibrated = FALSE, xLabel = "Hazard ratio", coverageTextColor = "#CF1020")
ggplot2::ggsave("negative_control_uncalibrated_min_strat_estimates_and_std_errors_for_va.png")


ohdsi-studies/Covid19SusceptibilityAlphaBlockers documentation built on Dec. 25, 2021, 4:45 p.m.