Legend: Large-Scale Evidence Generation and Evaluation in a Network of Databases

downloadBloodPressureData <- function(connectionDetails,
                                      indicationFolder,
                                      bpFolder,
                                      indicationId,
                                      cdmDatabaseSchema,
                                      cohortDatabaseSchema,
                                      tablePrefix,
                                      oracleTempSchema = NULL) {
    if (!file.exists(bpFolder)) {
        dir.create(bpFolder)
    }

    prepareForDataFetch <- function(conn, indicationFolder, indicationId, cohortDatabaseSchema, tablePrefix, oracleTempSchema) {
        pairedCohortTable <- paste(tablePrefix, tolower(indicationId), "pair_cohort", sep = "_")
        cohortsFolder <- file.path(indicationFolder, "allCohorts")
        exposureSummary <- read.csv(file.path(indicationFolder,
                                              "pairedExposureSummaryFilteredBySize.csv"))
        table <- exposureSummary[, c("targetId", "comparatorId")]
        colnames(table) <- SqlRender::camelCaseToSnakeCase(colnames(table))
        DatabaseConnector::insertTable(connection = conn,
                                       tableName = "#comparisons",
                                       data = table,
                                       dropTableIfExists = TRUE,
                                       createTable = TRUE,
                                       tempTable = TRUE,
                                       oracleTempSchema = oracleTempSchema)

        sql <- SqlRender::loadRenderTranslateSql("UnionExposureCohorts.sql",
                                                 "Legend",
                                                 dbms = connectionDetails$dbms,
                                                 oracleTempSchema = oracleTempSchema,
                                                 cohort_database_schema = cohortDatabaseSchema,
                                                 paired_cohort_table = pairedCohortTable)
        DatabaseConnector::executeSql(conn, sql, progressBar = FALSE, reportOverallTime = FALSE)

        sql <- "TRUNCATE TABLE #comparisons; DROP TABLE #comparisons;"
        sql <- SqlRender::translateSql(sql = sql,
                                       targetDialect = connectionDetails$dbms,
                                       oracleTempSchema = oracleTempSchema)$sql
        DatabaseConnector::executeSql(conn, sql, progressBar = FALSE, reportOverallTime = FALSE)

        # Just to make sure: check of rowIds are consistent with those generated before:
        sql <- "SELECT row_id, subject_id, cohort_start_date FROM #exposure_cohorts"
        sql <- SqlRender::translateSql(sql = sql,
                                       targetDialect = connectionDetails$dbms,
                                       oracleTempSchema = oracleTempSchema)$sql
        newCohorts <- DatabaseConnector::querySql(conn, sql)
        colnames(newCohorts) <- SqlRender::snakeCaseToCamelCase(colnames(newCohorts))
        newCohorts <- newCohorts[order(newCohorts$rowId,
                                       newCohorts$subjectId,
                                       newCohorts$cohortStartDate), ]
        row.names(newCohorts) <- NULL
        allCohorts <- readRDS(file.path(cohortsFolder, "allCohorts.rds"))
        allCohorts <- allCohorts[, colnames(newCohorts)]
        allCohorts <- unique(allCohorts)
        allCohorts <- allCohorts[order(allCohorts$rowId,
                                       allCohorts$subjectId,
                                       allCohorts$cohortStartDate), ]
        row.names(allCohorts) <- NULL
        if (!all.equal(allCohorts, newCohorts)) {
            stop("row IDs have changed. Hot swap failed")
        }
        ParallelLogger::logInfo("Verified that rowIds are the same")
    }

    conn <- DatabaseConnector::connect(connectionDetails)

    prepareForDataFetch(conn, indicationFolder, indicationId, cohortDatabaseSchema, tablePrefix, oracleTempSchema)

    sql <- "SELECT row_id,
    value_as_number,
    measurement_date,
    concept_id,
    concept_name
FROM @cdm_database_schema.measurement
INNER JOIN @cdm_database_schema.concept
ON measurement_concept_id = concept_id
INNER JOIN #exposure_cohorts
ON person_id = subject_id
AND measurement_date <= cohort_start_date
AND measurement_date > DATEADD(DAY, -365, cohort_start_date)
WHERE concept_id IN (3004249, 3012888)"
    sql <- SqlRender::renderSql(sql, cdm_database_schema = cdmDatabaseSchema)$sql
    sql <- SqlRender::translateSql(sql, targetDialect = connectionDetails$dbms)$sql
    bps <- DatabaseConnector::querySql(conn, sql)
    colnames(bps) <- SqlRender::snakeCaseToCamelCase(colnames(bps))
    DatabaseConnector::disconnect(conn)

    bps <- bps[!is.na(bps$valueAsNumber), ]
    bps <- bps[order(bps$rowId, bps$measurementDate, decreasing = TRUE), ]
    bps <- bps[!duplicated(paste(bps$rowId, bps$conceptId)), ]
    saveRDS(bps, file.path(bpFolder, "bps.rds"))
}

plotBalance <- function(row, indicationFolder, bpFolder, analysisId) {
    # row <- tcs[1, ]
    ParallelLogger::logInfo(paste("Plotting balance for", row$targetName, "and", row$comparatorName))
    bpFolder <- file.path(indicationFolder, "bp")
    bps <- readRDS(file.path(bpFolder, "bps.rds"))
    bps <- bps[bps$valueAsNumber < 250, ]
    bps <- bps[bps$valueAsNumber > 25, ]
    # outcomeModelReference <- readRDS(file.path(indicationFolder,
    #                                            "cmOutput",
    #                                            "outcomeModelReference1.rds"))
    outcomeModelReference <- readRDS(file.path(bpFolder, "outcomeModelReference.rds"))
    outcomeModelReference <- outcomeModelReference[outcomeModelReference$targetId == row$targetId &
                                                       outcomeModelReference$comparatorId == row$comparatorId &
                                                       outcomeModelReference$analysisId == analysisId, ]
    outcomeModelReference <- outcomeModelReference[1, ]
    cmData <- CohortMethod::loadCohortMethodData(file.path(indicationFolder,
                                                           "cmOutput",
                                                           outcomeModelReference$cohortMethodDataFolder),
                                                 skipCovariates = FALSE)
    sharedPs <- readRDS(file.path(indicationFolder, "cmOutput", outcomeModelReference$sharedPsFile))
    if (analysisId == 1) {
        strataPop <- CohortMethod::stratifyByPs(sharedPs, numberOfStrata = 10)
    } else if (analysisId == 3) {
        strataPop <- CohortMethod::matchOnPs(sharedPs, caliper = 0.2, caliperScale = "standardized logit", maxRatio = 100)
    } else {
        stop("Unknown analysis ID ", analysisId)
    }
    if (nrow(strataPop) == 0) {
        return(NULL)
    }
    resultRow <- row[, c("targetId", "targetName" ,"comparatorId", "comparatorName")]
    resultRow$targetSubjects <- sum(strataPop$treatment == 1)
    resultRow$comparatorSubjects <- sum(strataPop$treatment == 0)

    # Restricting before cohort to just those in the after cohort:
    # subset <- bps[bps$rowId %in% strataPop$rowId, ]
    subset <- bps[bps$rowId %in% cmData$cohorts$rowId, ]
    strataPop <- strataPop[strataPop$rowId %in% subset$rowId, ]
    cmData$cohorts <- cmData$cohorts[cmData$cohorts$rowId %in% subset$rowId, ]

    resultRow$targetSubjectsWithBp <- sum(strataPop$treatment == 1)
    resultRow$comparatorSubjectsWithBp <- sum(strataPop$treatment == 0)

    # Density plot with custom smoothing:
    m <- merge(subset, strataPop[, c("rowId", "treatment", "stratumId")])
    m$group <- as.character(row$targetName)
    m$group[m$treatment == 0] <- as.character(row$comparatorName)
    m$group <- factor(m$group, levels = c(as.character(row$targetName),
                                          as.character(row$comparatorName)))
    m$conceptName[m$conceptName == "BP diastolic"] <- "Diastolic"
    m$conceptName[m$conceptName == "BP systolic"] <- "Systolic"

    # Save for Marc:
    fileName <- file.path(bpFolder, sprintf("BpData_%s_%s_%s.rds", as.character(row$targetName), as.character(row$comparatorName), analysisId))
    saveRDS(m, fileName)

    # Save for George:
    balance <- CohortMethod::computeCovariateBalance(population = strataPop, cohortMethodData = cmData)
    balanceFile <- file.path(bpFolder, sprintf("Balance_%s_%s.csv", as.character(row$targetName), as.character(row$comparatorName)))
    write.csv(balance, balanceFile, row.names = FALSE)

    # before <- data.frame()
    # after <- data.frame()
    # for (conceptName in c("Diastolic", "Systolic")) {
    #     for (group in c(as.character(row$targetName), as.character(row$comparatorName))) {
    #         d <- density(m$valueAsNumber[m$conceptName == conceptName & m$group == group], bw = 5, from = 50, to = 250, n = 210)
    #         before <- rbind(before,
    #                         data.frame(x = d$x,
    #                                    y = d$y,
    #                                    conceptName = conceptName,
    #                                    group = group))
    #         for (stratumId in unique(m$stratumId)) {
    #             d <- density(m$valueAsNumber[m$conceptName == conceptName & m$group == group & m$stratumId == stratumId], bw = 5, from = 50, to = 250, n = 210)
    #             after <- rbind(after,
    #                            data.frame(x = d$x,
    #                                       y = d$y,
    #                                       conceptName = conceptName,
    #                                       group = group,
    #                                       stratumId = stratumId))
    #         }
    #     }
    # }
    # plotBp <- function(vizData, stratified, fileName) {
    #     plot <- ggplot2::ggplot(vizData, ggplot2::aes(x = x, y = y, group = group, color = group, fill = group)) +
    #         ggplot2::geom_area(position = "identity") +
    #         ggplot2::scale_fill_manual(values = c(rgb(0.8, 0, 0, alpha = 0.5), rgb(0, 0, 0.8, alpha = 0.5))) +
    #         ggplot2::scale_color_manual(values = c(rgb(0.8, 0, 0, alpha = 0.5), rgb(0, 0, 0.8, alpha = 0.5))) +
    #         ggplot2::xlab("Blood pressure") +
    #         ggplot2::ylab("Density") +
    #         ggplot2::xlim(c(50,200)) +
    #         ggplot2::theme(legend.position = "top",
    #                        legend.title = ggplot2::element_blank())
    #     if (stratified) {
    #         plot <- plot + ggplot2::facet_grid(conceptName~stratumId)
    #     } else {
    #         plot <- plot + ggplot2::facet_grid(conceptName~.)
    #     }
    #     width <- if (stratified) {11} else {5}
    #     ggplot2::ggsave(fileName, plot, width = width, height = 4, dpi = 400)
    # }
    # plotBp(before, stratified = FALSE, fileName = file.path(bpFolder, sprintf("Before_%s_%s.png", as.character(row$targetName), as.character(row$comparatorName))))
    # plotBp(after, stratified = TRUE, fileName = file.path(bpFolder, sprintf("After_%s_%s.png", as.character(row$targetName), as.character(row$comparatorName))))
    #
    # # Flip T and C:
    # before$group <- factor(before$group, levels = c(as.character(row$comparatorName), as.character(row$targetName)))
    # after$group <- factor(after$group, levels = c(as.character(row$comparatorName), as.character(row$targetName)))
    # plotBp(before, stratified = FALSE, fileName = file.path(bpFolder, sprintf("Before_%s_%s.png", as.character(row$comparatorName), as.character(row$targetName))))
    # plotBp(after, stratified = TRUE, fileName = file.path(bpFolder, sprintf("After_%s_%s.png", as.character(row$comparatorName), as.character(row$targetName))))

    # Compute balance
    cmData$covariates <- ff::as.ffdf(data.frame(rowId = subset$rowId,
                                                covariateId = subset$conceptId,
                                                covariateValue = subset$valueAsNumber))
    subsetRef <- unique(subset[, c("conceptId", "conceptName")])
    cmData$covariateRef <- ff::as.ffdf(data.frame(covariateId = subsetRef$conceptId,
                                                  covariateName = subsetRef$conceptName))
    bal <- CohortMethod::computeCovariateBalance(strataPop, cmData)
    resultRow <- merge(resultRow, bal)

    return(resultRow)
}

refitPropensityModel <- function(row, indicationFolder, bpFolder, analysisId) {
    # row <- tcs[1, ]
    ParallelLogger::logInfo(paste("Refitting propensity model for", row$targetName, "and", row$comparatorName))
    bpFolder <- file.path(indicationFolder, "bp")
    bps <- readRDS(file.path(bpFolder, "bps.rds"))
    bps <- bps[bps$valueAsNumber < 250, ]
    bps <- bps[bps$valueAsNumber > 25, ]
    # outcomeModelReference <- readRDS(file.path(indicationFolder,
    #                                            "cmOutput",
    #                                            "outcomeModelReference1.rds"))
    outcomeModelReference <- readRDS(file.path(bpFolder, "outcomeModelReference.rds"))
    outcomeModelReference <- outcomeModelReference[outcomeModelReference$targetId == row$targetId &
                                                       outcomeModelReference$comparatorId == row$comparatorId &
                                                       outcomeModelReference$analysisId == analysisId, ]
    outcomeModelReference <- outcomeModelReference[outcomeModelReference$strataFile != "", ][1, ]

    if (!file.exists(file.path(bpFolder, outcomeModelReference$cohortMethodDataFolder))) {
        # Create new CohortMethodData object, restricting to people with BP data, and adding BP as splines
        cmData <- CohortMethod::loadCohortMethodData(file.path(indicationFolder,
                                                               "cmOutput",
                                                               outcomeModelReference$cohortMethodDataFolder))
        subset <- bps[bps$rowId %in% cmData$cohorts$rowId, ]
        # Convert to splines:
        newCovars <- data.frame()
        newCovarRef <- data.frame()
        for (conceptId in unique(subset$conceptId)) {
            # conceptId <- 3012888
            measurement <- subset[subset$conceptId == conceptId, ]
            designMatrix <- splines::bs(measurement$valueAsNumber, 5)
            sparse <- lapply(1:5, function(x) data.frame(rowId = measurement$rowId,
                                                         covariateId = conceptId * 10 + x,
                                                         covariateValue = designMatrix[, x]))

            sparse <- do.call("rbind", sparse)
            newCovars <- rbind(newCovars, sparse)
            ref <- data.frame(covariateId = conceptId * 10 + 1:5,
                              covariateName = paste(measurement$conceptName[1], 1:5),
                              analysisId = conceptId,
                              conceptId = conceptId)
            newCovarRef <- rbind(newCovarRef, ref)
        }
        covariates <- cmData$covariates
        covariates <- covariates[ffbase::`%in%`(covariates$rowId, subset$rowId), ]
        covariates <- ffbase::ffdfappend(covariates, newCovars)
        covariateRef <- cmData$covariateRef
        covariateRef <- ffbase::ffdfappend(covariateRef, newCovarRef)
        newCmData <- cmData
        newCmData$cohorts <- newCmData$cohorts[newCmData$cohorts$rowId %in% subset$rowId, ]
        attrition <- attr(newCmData$cohorts, "metaData")$attrition
        attrition <- rbind(attrition,
                           data.frame(description = "Having BP data",
                                      targetPersons =  sum(newCmData$cohort$treatment == 1),
                                      comparatorPersons =  sum(newCmData$cohort$treatment == 0),
                                      targetExposures =  sum(newCmData$cohort$treatment == 1),
                                      comparatorExposures =  sum(newCmData$cohort$treatment == 0)))
        attr(newCmData$cohorts, "metaData")$attrition <- attrition
        newCmData$outcomes <- newCmData$outcomes[newCmData$outcomes$rowId %in% subset$rowId, ]
        newCmData$covariates <- covariates
        newCmData$covariateRef <- covariateRef
        CohortMethod::saveCohortMethodData(cohortMethodData = newCmData,
                                           file = file.path(bpFolder,outcomeModelReference$cohortMethodDataFolder),
                                           compress = TRUE)
        rm(cmData)
        cmData <- newCmData
    } else {
        cmData <- CohortMethod::loadCohortMethodData(file.path(bpFolder, outcomeModelReference$cohortMethodDataFolder))
        subset <- bps[bps$rowId %in% cmData$cohorts$rowId, ]
    }

    if (!file.exists(file.path(bpFolder, outcomeModelReference$sharedPsFile))) {
        # Fit propensity model:
        subgroupCovariateIds <- c(1998, 2998, 3998, 4998, 5998, 6998, 7998, 8998)
        studyPop <- CohortMethod::createStudyPopulation(cohortMethodData = cmData,
                                                        removeDuplicateSubjects = "keep first",
                                                        removeSubjectsWithPriorOutcome = TRUE,
                                                        riskWindowStart = 1,
                                                        riskWindowEnd = 0,
                                                        addExposureDaysToEnd = TRUE,
                                                        minDaysAtRisk = 1,
                                                        censorAtNewRiskWindow = TRUE)
        ps <- CohortMethod::createPs(population = studyPop,
                                     cohortMethodData = cmData,
                                     control = Cyclops::createControl(noiseLevel = "quiet",
                                                                      cvType = "auto",
                                                                      tolerance = 2e-07,
                                                                      cvRepetitions = 1,
                                                                      fold = 10,
                                                                      startingVariance = 0.01,
                                                                      seed = 123,
                                                                      threads = 10),
                                     stopOnError = FALSE,
                                     excludeCovariateIds = subgroupCovariateIds,
                                     maxCohortSizeForFitting = 1e+05)
        saveRDS(ps, file.path(bpFolder, outcomeModelReference$sharedPsFile))
    } else {
        ps <- readRDS(file.path(bpFolder, outcomeModelReference$sharedPsFile))
    }
    if (!file.exists(file.path(bpFolder, outcomeModelReference$strataFile))) {
        studyPop <- CohortMethod::createStudyPopulation(population = ps,
                                                        removeDuplicateSubjects = "keep first",
                                                        removeSubjectsWithPriorOutcome = TRUE,
                                                        riskWindowStart = 1,
                                                        riskWindowEnd = 0,
                                                        endAnchor = "cohort end",
                                                        minDaysAtRisk = 1,
                                                        censorAtNewRiskWindow = TRUE)
        if (analysisId == 1) {
            strataPop <- CohortMethod::stratifyByPs(studyPop, numberOfStrata = 10, baseSelection = "all")
        } else if (analysisId == 3) {
            strataPop <- CohortMethod::matchOnPs(studyPop, caliper = 0.2, caliperScale = "standardized logit", maxRatio = 100)
        } else {
            stop("Unknown analysis ID ", analysisId)
        }
        saveRDS(strataPop, file.path(bpFolder, outcomeModelReference$strataFile))
    } else {
        strataPop <- readRDS(file.path(bpFolder, outcomeModelReference$strataFile))
    }
    if (nrow(strataPop) != 0) {
        if (!file.exists(file.path(bpFolder, sprintf("BalanceAfterMatchingUsingBp_%s_%s_%s.png", row$targetName, row$comparatorName, analysisId)))) {
            # Overall balance:
            bal <- CohortMethod::computeCovariateBalance(strataPop, cmData)
            CohortMethod::plotCovariateBalanceScatterPlot(bal, fileName = file.path(bpFolder, sprintf("BalanceAfterStrataUsingBp_%s_%s.png", row$targetName, row$comparatorName)))
            CohortMethod::plotCovariateBalanceOfTopVariables(bal, fileName = file.path(bpFolder, sprintf("BalanceTopAfterStrataUsingBp_%s_%s.png", row$targetName, row$comparatorName)))

            balanceFile <- file.path(bpFolder, sprintf("BalanceAdjustBp_%s_%s.csv", as.character(row$targetName), as.character(row$comparatorName)))
            write.csv(bal, balanceFile, row.names = FALSE)
        }

        cmData$covariates <- ff::as.ffdf(data.frame(rowId = subset$rowId,
                                                    covariateId = subset$conceptId,
                                                    covariateValue = subset$valueAsNumber))
        subsetRef <- unique(subset[, c("conceptId", "conceptName")])
        cmData$covariateRef <- ff::as.ffdf(data.frame(covariateId = subsetRef$conceptId,
                                                      covariateName = subsetRef$conceptName))
        bal <- CohortMethod::computeCovariateBalance(strataPop, cmData)
        resultRow <- row[, c("targetId", "targetName" ,"comparatorId", "comparatorName")]
        resultRow <- merge(resultRow, bal)
        ff::close.ffdf(cmData$covariates)
        ff::close.ffdf(cmData$covariateRef)
        return(resultRow)
    } else {
        ff::close.ffdf(cmData$covariates)
        ff::close.ffdf(cmData$covariateRef)
        return(NULL)
    }

}

computeAdjustedHrs <- function(row, indicationFolder, bpFolder, indicationId, analysisId) {
    # row <- tcs[1,]
    ParallelLogger::logInfo(paste("Computing adjusted HR for", row$targetName, "and", row$comparatorName))
    bps <- readRDS(file.path(bpFolder, "bps.rds"))
    bps <- bps[bps$valueAsNumber < 250, ]
    bps <- bps[bps$valueAsNumber > 25, ]
    # outcomeModelReference1 <- readRDS(file.path(indicationFolder,
    #                                             "cmOutput",
    #                                             "outcomeModelReference1.rds"))
    # outcomeModelReference2 <- readRDS(file.path(indicationFolder,
    #                                             "cmOutput",
    #                                             "outcomeModelReference2.rds"))
    # outcomeModelReference3 <- readRDS(file.path(indicationFolder,
    #                                             "cmOutput",
    #                                             "outcomeModelReference3.rds"))
    # outcomeModelReference <- rbind(outcomeModelReference1, outcomeModelReference2, outcomeModelReference3)
    outcomeModelReference <- readRDS(file.path(bpFolder, "outcomeModelReference.rds"))
    onTreatment <- outcomeModelReference[outcomeModelReference$targetId == row$targetId &
                                                       outcomeModelReference$comparatorId == row$comparatorId &
                                                       outcomeModelReference$analysisId == analysisId, ]
    analysisFolder <- file.path(bpFolder, sprintf("Analysis_%s", analysisId))
    if (!file.exists(analysisFolder)) {
        dir.create(analysisFolder)
    }
    ps <- readRDS(file.path(bpFolder, onTreatment$sharedPsFile[1]))
    cmData <- CohortMethod::loadCohortMethodData(file.path(bpFolder, onTreatment$cohortMethodDataFolder[1]))


    # onTreatment <- onTreatment[onTreatment$outcomeId %in% outcomesOfInterest$cohortId, ]

    computeHr <- function(i) {
        # i = 1
        omFile <- file.path(bpFolder, onTreatment$outcomeModelFile[i])
        if (!file.exists(omFile)) {
            studyPop <- CohortMethod::createStudyPopulation(population = ps,
                                                            cohortMethodData = cmData,
                                                            outcomeId = onTreatment$outcomeId[i],
                                                            removeDuplicateSubjects = "keep first",
                                                            removeSubjectsWithPriorOutcome = TRUE,
                                                            riskWindowStart = 1,
                                                            riskWindowEnd = 0,
                                                            endAnchor = "cohort end",
                                                            minDaysAtRisk = 1,
                                                            censorAtNewRiskWindow = TRUE)
            # strataPop <- CohortMethod::stratifyByPs(studyPop, numberOfStrata = 10, baseSelection = "all")
            strataPop <- CohortMethod::matchOnPs(studyPop, caliper = 0.2, caliperScale = "standardized logit", maxRatio = 100)
            outcomeModel <- CohortMethod::fitOutcomeModel(population = strataPop,
                                                          stratified = TRUE,
                                                          modelType = "cox")
            saveRDS(outcomeModel, omFile)
        }
    }
    plyr::l_ply(1:nrow(onTreatment), computeHr, .progress = "text")
    originalSummary <- CohortMethod::summarizeAnalyses(onTreatment, file.path(indicationFolder, "cmOutput"))
    originalSummary$type <- "Original"
    bpAdjustedSummary <- CohortMethod::summarizeAnalyses(onTreatment, bpFolder)
    bpAdjustedSummary$type <- "Adjusting for\nblood pressure"
    estimates <- rbind(originalSummary, bpAdjustedSummary)

    # Calibration ------------------------------------------
    pathToCsv <- system.file("settings", "OutcomesOfInterest.csv", package = "Legend")
    outcomesOfInterest <- read.csv(pathToCsv, stringsAsFactors = FALSE)
    outcomesOfInterest <- outcomesOfInterest[outcomesOfInterest$indicationId == indicationId, ]
    pathToCsv <- file.path(indicationFolder, "signalInjectionSummary.csv")
    siSummary <- read.csv(pathToCsv)
    siSummary <- siSummary[siSummary$newOutcomeId %in% estimates$outcomeId, ]
    pcs <- data.frame(outcomeId = siSummary$newOutcomeId, targetEffectSize = siSummary$trueEffectSize)
    pathToCsv <- system.file("settings", "NegativeControls.csv", package = "Legend")
    negativeControls <- read.csv(pathToCsv)
    negativeControls <- negativeControls[negativeControls$indicationId == indicationId, ]

    estimates <- merge(estimates,
                       data.frame(outcomeId = siSummary$newOutcomeId,
                                  targetEffectSize = siSummary$targetEffectSize),
                       all.x = TRUE)
    estimates$targetEffectSize[estimates$outcomeId %in% negativeControls$cohortId] <- 1

    tcEstimates <- estimates[estimates$outcomeId %in% outcomesOfInterest$cohortId |
                                 estimates$outcomeId %in% negativeControls$cohortId |
                                 estimates$outcomeId  %in% siSummary$newOutcomeId[siSummary$exposureId == row$targetId], ]

    ctEstimates <- estimates[estimates$outcomeId %in% outcomesOfInterest$cohortId |
                                 estimates$outcomeId %in% negativeControls$cohortId |
                                 estimates$outcomeId  %in% siSummary$newOutcomeId[siSummary$exposureId == row$comparatorId], ]
    temp <- ctEstimates$targetId
    ctEstimates$targetId <- ctEstimates$comparatorId
    ctEstimates$comparatorId <- temp
    temp <- ctEstimates$target
    ctEstimates$target <- ctEstimates$comparator
    ctEstimates$comparator <- temp
    temp <- ctEstimates$targetDays
    ctEstimates$targetDays <- ctEstimates$comparatorDays
    ctEstimates$comparatorDays <- temp
    temp <- ctEstimates$eventsTarget
    ctEstimates$eventsTarget <- ctEstimates$eventsComparator
    ctEstimates$eventsComparator <- temp
    ctEstimates$logRr <- -ctEstimates$logRr
    ctEstimates$rr <- 1/ctEstimates$r
    temp <- 1/ctEstimates$ci95ub
    ctEstimates$ci95ub <- 1/ctEstimates$ci95lb
    ctEstimates$ci95lb <- temp

    if (all(is.na(tcEstimates$seLogRr))) {
        warning("All estimates are NA. Skipping calibration")
        return()
    }
    calibrate <- function(estimates) {
        controlEstimates <- estimates[!is.na(estimates$targetEffectSize), ]
        controlEstimates <- controlEstimates[!is.na(controlEstimates$seLogRr), ]
        errorModel <- EmpiricalCalibration::fitSystematicErrorModel(logRr = controlEstimates$logRr,
                                                                    seLogRr = controlEstimates$seLogRr,
                                                                    trueLogRr = log(controlEstimates$targetEffectSize),
                                                                    estimateCovarianceMatrix = FALSE)
        # EmpiricalCalibration::plotCiCalibrationEffect(logRr = controlEstimates$logRr,
        #                                               seLogRr = controlEstimates$seLogRr,
        #                                               trueLogRr = log(controlEstimates$targetEffectSize))
        cal <- EmpiricalCalibration::calibrateConfidenceInterval(logRr = estimates$logRr,
                                                                        seLogRr = estimates$seLogRr,
                                                                        model = errorModel)
        calibrated <- estimates
        calibrated$rr <- exp(cal$logRr)
        calibrated$ci95lb <- exp(cal$logLb95Rr)
        calibrated$ci95ub <- exp(cal$logUb95Rr)
        calibrated$logLb95Rr <- NULL
        calibrated$logUb95Rr <- NULL
        calibrated$outcomeId <- estimates$outcomeId
        calibrated$type <- estimates$type
        calibrated$estimate <- "Calibrated"
        estimates$estimate <- "Uncalibrated"
        estimates <- rbind(estimates[, colnames(calibrated)], calibrated)
    }
    estimates <- rbind(calibrate(tcEstimates[tcEstimates$type == "Original", ]),
                       calibrate(tcEstimates[tcEstimates$type == "Adjusting for\nblood pressure", ]),
                       calibrate(ctEstimates[ctEstimates$type == "Original", ]),
                       calibrate(ctEstimates[ctEstimates$type == "Adjusting for\nblood pressure", ]))

    vizData <- merge(estimates, data.frame(outcomeId = outcomesOfInterest$cohortId,
                                           outcomeName = outcomesOfInterest$name))
    vizData <- vizData[!is.na(vizData$seLogRr), ]
    outcomeNames <- unique(vizData$outcomeName)
    outcomeNames <- outcomeNames[order(outcomeNames, decreasing = TRUE)]
    vizData$y <- match(vizData$outcomeName, outcomeNames) - 0.1 + 0.2*(vizData$type == "Original")

    # Save for Marc:
    fileName <- file.path(bpFolder, sprintf("HrsData_%s_%s_%s.rds", row$targetName, row$comparatorName, analysisId))
    saveRDS(vizData, fileName)

    # Save for George:
    fileName <- file.path(bpFolder, sprintf("HrsDataBpAdj_%s_%s_%s.csv", row$targetName, row$comparatorName, analysisId))
    write.csv(estimates, fileName, row.names = FALSE)

    plotHrs <- function(vizData, fileName) {
        breaks <- c(0.1, 0.25, 0.5, 1, 2, 4, 8, 10)
        odd <- seq(1, length(outcomeNames), by = 2)
        bars <- data.frame(xmin = 0.01,
                           xmax = 100,
                           ymin = odd - 0.5,
                           ymax = odd + 0.5,
                           type = "Original",
                           rr = 1,
                           y = 1)
        vizData$estimate <- factor(vizData$estimate, levels = c("Uncalibrated", "Calibrated"))
        plot <- ggplot2::ggplot(vizData, ggplot2::aes(x = rr, y = y, color = type, shape = type, xmin = ci95lb, xmax = ci95ub)) +
            ggplot2::geom_rect(ggplot2::aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), fill = "#EEEEEE", colour = "#EEEEEE", size = 0, data = bars) +
            ggplot2::geom_vline(xintercept = breaks, colour = "#AAAAAA", lty = 1, size = 0.25) +
            ggplot2::geom_vline(xintercept = 1, size = 0.5) +
            ggplot2::geom_errorbarh(alpha = 0.65, size = 0.6, height = 0.3) +
            ggplot2::geom_point(alpha = 0.65, size = 2) +
            ggplot2::scale_y_continuous(breaks = 1:length(outcomeNames), labels = outcomeNames) +
            ggplot2::coord_cartesian(xlim = c(0.1, 10)) +
            ggplot2::scale_x_log10(breaks = breaks, labels = breaks) +
            ggplot2::scale_color_manual(values = c(rgb(0.8, 0, 0, alpha = 0.65), rgb(0, 0, 0.8, alpha = 0.65))) +
            ggplot2::xlab("Hazard Ratio") +
            ggplot2::facet_grid(~estimate) +
            ggplot2::theme(axis.title.y = ggplot2::element_blank(),
                           legend.title = ggplot2::element_blank(),
                           panel.grid.minor = ggplot2::element_blank(),
                           panel.background = ggplot2::element_blank(),
                           panel.grid.major = ggplot2::element_blank(),
                           axis.ticks = ggplot2::element_blank(),
                           strip.background = ggplot2::element_blank())
        ggplot2::ggsave(filename = fileName, plot = plot, width = 8, height = 10, dpi = 400)
        # ggplot2::ggsave(filename = fileName, plot = plot, width = 6.5, height = 10, dpi = 400)
    }
    plotHrs(vizData[vizData$targetId == row$targetId, ], file.path(bpFolder, sprintf("Hrs_%s_%s_%s.png", row$targetName, row$comparatorName, analysisId)))

    plotHrs(vizData[vizData$targetId == row$comparatorId, ], file.path(bpFolder, sprintf("Hrs_%s_%s_%s.png", row$comparatorName, row$targetName, analysisId)))
}


computeUnadjustedHrs <- function(row, indicationFolder, bpFolder, indicationId, analysisId) {
    # row <- tcs[1,]
    ParallelLogger::logInfo(paste("Computing unadjusted HR for", row$targetName, "and", row$comparatorName))
    outcomeModelReference <- readRDS(file.path(bpFolder, "outcomeModelReference.rds"))
    onTreatment <- outcomeModelReference[outcomeModelReference$targetId == row$targetId &
                                             outcomeModelReference$comparatorId == row$comparatorId &
                                             outcomeModelReference$analysisId == analysisId, ]
    analysisFolder <- file.path(bpFolder, sprintf("Analysis_%s", analysisId + 4))
    if (!file.exists(analysisFolder)) {
        dir.create(analysisFolder)
    }
    cmData <- CohortMethod::loadCohortMethodData(file.path(indicationFolder, "cmOutput", onTreatment$cohortMethodDataFolder[1]))

    computeHr <- function(i) {
        #i = 1
        omFile <- file.path(bpFolder, gsub(sprintf("Analysis_%s", analysisId), sprintf("Analysis_%s", analysisId + 4), onTreatment$outcomeModelFile[i]))
        if (!file.exists(omFile)) {
            studyPop <- CohortMethod::createStudyPopulation(cohortMethodData = cmData,
                                                            outcomeId = onTreatment$outcomeId[i],
                                                            removeDuplicateSubjects = "keep first",
                                                            removeSubjectsWithPriorOutcome = TRUE,
                                                            riskWindowStart = 1,
                                                            riskWindowEnd = 0,
                                                            endAnchor = "cohort end",
                                                            minDaysAtRisk = 1,
                                                            censorAtNewRiskWindow = TRUE)
            outcomeModel <- CohortMethod::fitOutcomeModel(population = studyPop,
                                                          stratified = FALSE,
                                                          modelType = "cox")
            saveRDS(outcomeModel, omFile)
        }
    }
    plyr::l_ply(1:nrow(onTreatment), computeHr, .progress = "text")
    originalSummary <- CohortMethod::summarizeAnalyses(onTreatment, file.path(indicationFolder, "cmOutput"))
    originalSummary$type <- "Original"
    onTreatment$analysisId <- onTreatment$analysisId + 4
    onTreatment$outcomeModelFile <- gsub(sprintf("Analysis_%s", analysisId), sprintf("Analysis_%s", analysisId + 4), onTreatment$outcomeModelFile)
    unadjustedSummary <- CohortMethod::summarizeAnalyses(onTreatment, bpFolder)
    unadjustedSummary$type <- "Unadjusted"
    estimates <- rbind(originalSummary, unadjustedSummary)

    # Calibration ------------------------------------------
    pathToCsv <- system.file("settings", "OutcomesOfInterest.csv", package = "Legend")
    outcomesOfInterest <- read.csv(pathToCsv, stringsAsFactors = FALSE)
    outcomesOfInterest <- outcomesOfInterest[outcomesOfInterest$indicationId == indicationId, ]
    pathToCsv <- file.path(indicationFolder, "signalInjectionSummary.csv")
    siSummary <- read.csv(pathToCsv)
    siSummary <- siSummary[siSummary$newOutcomeId %in% estimates$outcomeId, ]
    pcs <- data.frame(outcomeId = siSummary$newOutcomeId, targetEffectSize = siSummary$trueEffectSize)
    pathToCsv <- system.file("settings", "NegativeControls.csv", package = "Legend")
    negativeControls <- read.csv(pathToCsv)
    negativeControls <- negativeControls[negativeControls$indicationId == indicationId, ]

    estimates <- merge(estimates,
                       data.frame(outcomeId = siSummary$newOutcomeId,
                                  targetEffectSize = siSummary$targetEffectSize),
                       all.x = TRUE)
    estimates$targetEffectSize[estimates$outcomeId %in% negativeControls$cohortId] <- 1

    tcEstimates <- estimates[estimates$outcomeId %in% outcomesOfInterest$cohortId |
                                 estimates$outcomeId %in% negativeControls$cohortId |
                                 estimates$outcomeId  %in% siSummary$newOutcomeId[siSummary$exposureId == row$targetId], ]

    ctEstimates <- estimates[estimates$outcomeId %in% outcomesOfInterest$cohortId |
                                 estimates$outcomeId %in% negativeControls$cohortId |
                                 estimates$outcomeId  %in% siSummary$newOutcomeId[siSummary$exposureId == row$comparatorId], ]
    temp <- ctEstimates$targetId
    ctEstimates$targetId <- ctEstimates$comparatorId
    ctEstimates$comparatorId <- temp
    temp <- ctEstimates$target
    ctEstimates$target <- ctEstimates$comparator
    ctEstimates$comparator <- temp
    temp <- ctEstimates$targetDays
    ctEstimates$targetDays <- ctEstimates$comparatorDays
    ctEstimates$comparatorDays <- temp
    temp <- ctEstimates$eventsTarget
    ctEstimates$eventsTarget <- ctEstimates$eventsComparator
    ctEstimates$eventsComparator <- temp
    ctEstimates$logRr <- -ctEstimates$logRr
    ctEstimates$rr <- 1/ctEstimates$r
    temp <- 1/ctEstimates$ci95ub
    ctEstimates$ci95ub <- 1/ctEstimates$ci95lb
    ctEstimates$ci95lb <- temp

    if (all(is.na(tcEstimates$seLogRr))) {
        warning("All estimates are NA. Skipping calibration")
        return()
    }
    calibrate <- function(estimates) {
        controlEstimates <- estimates[!is.na(estimates$targetEffectSize), ]
        controlEstimates <- controlEstimates[!is.na(controlEstimates$seLogRr), ]
        errorModel <- EmpiricalCalibration::fitSystematicErrorModel(logRr = controlEstimates$logRr,
                                                                    seLogRr = controlEstimates$seLogRr,
                                                                    trueLogRr = log(controlEstimates$targetEffectSize),
                                                                    estimateCovarianceMatrix = FALSE)
        # EmpiricalCalibration::plotCiCalibrationEffect(logRr = controlEstimates$logRr,
        #                                               seLogRr = controlEstimates$seLogRr,
        #                                               trueLogRr = log(controlEstimates$targetEffectSize))
        cal <- EmpiricalCalibration::calibrateConfidenceInterval(logRr = estimates$logRr,
                                                                 seLogRr = estimates$seLogRr,
                                                                 model = errorModel)
        calibrated <- estimates
        calibrated$rr <- exp(cal$logRr)
        calibrated$ci95lb <- exp(cal$logLb95Rr)
        calibrated$ci95ub <- exp(cal$logUb95Rr)
        calibrated$logLb95Rr <- NULL
        calibrated$logUb95Rr <- NULL
        calibrated$outcomeId <- estimates$outcomeId
        calibrated$type <- estimates$type
        calibrated$estimate <- "Calibrated"
        estimates$estimate <- "Uncalibrated"
        estimates <- rbind(estimates[, colnames(calibrated)], calibrated)
    }
    estimates <- rbind(calibrate(tcEstimates[tcEstimates$type == "Original", ]),
                       calibrate(tcEstimates[tcEstimates$type == "Unadjusted", ]),
                       calibrate(ctEstimates[ctEstimates$type == "Original", ]),
                       calibrate(ctEstimates[ctEstimates$type == "Unadjusted", ]))

    vizData <- merge(estimates, data.frame(outcomeId = outcomesOfInterest$cohortId,
                                           outcomeName = outcomesOfInterest$name))
    vizData <- vizData[!is.na(vizData$seLogRr), ]
    outcomeNames <- unique(vizData$outcomeName)
    outcomeNames <- outcomeNames[order(outcomeNames, decreasing = TRUE)]
    vizData$y <- match(vizData$outcomeName, outcomeNames) - 0.1 + 0.2*(vizData$type == "Original")

    # Save for George:
    fileName <- file.path(bpFolder, sprintf("HrsDataNoAdj_%s_%s_%s.csv", row$targetName, row$comparatorName, analysisId + 4))
    write.csv(estimates, fileName, row.names = FALSE)

    plotHrs <- function(vizData, fileName) {
        breaks <- c(0.1, 0.25, 0.5, 1, 2, 4, 8, 10)
        odd <- seq(1, length(outcomeNames), by = 2)
        bars <- data.frame(xmin = 0.01,
                           xmax = 100,
                           ymin = odd - 0.5,
                           ymax = odd + 0.5,
                           type = "Original",
                           rr = 1,
                           y = 1)
        vizData$estimate <- factor(vizData$estimate, levels = c("Uncalibrated", "Calibrated"))
        plot <- ggplot2::ggplot(vizData, ggplot2::aes(x = rr, y = y, color = type, shape = type, xmin = ci95lb, xmax = ci95ub)) +
            ggplot2::geom_rect(ggplot2::aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), fill = "#EEEEEE", colour = "#EEEEEE", size = 0, data = bars) +
            ggplot2::geom_vline(xintercept = breaks, colour = "#AAAAAA", lty = 1, size = 0.25) +
            ggplot2::geom_vline(xintercept = 1, size = 0.5) +
            ggplot2::geom_errorbarh(alpha = 0.65, size = 0.6, height = 0.3) +
            ggplot2::geom_point(alpha = 0.65, size = 2) +
            ggplot2::scale_y_continuous(breaks = 1:length(outcomeNames), labels = outcomeNames) +
            ggplot2::coord_cartesian(xlim = c(0.1, 10)) +
            ggplot2::scale_x_log10(breaks = breaks, labels = breaks) +
            ggplot2::scale_color_manual(values = c(rgb(0.8, 0, 0, alpha = 0.65), rgb(0, 0, 0.8, alpha = 0.65))) +
            ggplot2::xlab("Hazard Ratio") +
            ggplot2::facet_grid(~estimate) +
            ggplot2::theme(axis.title.y = ggplot2::element_blank(),
                           legend.title = ggplot2::element_blank(),
                           panel.grid.minor = ggplot2::element_blank(),
                           panel.background = ggplot2::element_blank(),
                           panel.grid.major = ggplot2::element_blank(),
                           axis.ticks = ggplot2::element_blank(),
                           strip.background = ggplot2::element_blank())
        ggplot2::ggsave(filename = fileName, plot = plot, width = 8, height = 10, dpi = 400)
        # ggplot2::ggsave(filename = fileName, plot = plot, width = 6.5, height = 10, dpi = 400)
    }
    plotHrs(vizData[vizData$targetId == row$targetId, ], file.path(bpFolder, sprintf("Hrs_%s_%s_%s.png", row$targetName, row$comparatorName, analysisId + 4)))

    plotHrs(vizData[vizData$targetId == row$comparatorId, ], file.path(bpFolder, sprintf("Hrs_%s_%s_%s.png", row$comparatorName, row$targetName, analysisId + 4)))
}
OHDSI/Legend documentation built on Dec. 29, 2020, 3:52 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
OHDSI/Legend
Large-Scale Evidence Generation and Evaluation in a Network of Databases

extras/BloodPressureFunctions.R
In OHDSI/Legend: Large-Scale Evidence Generation and Evaluation in a Network of Databases

R Package Documentation

Browse R Packages

We want your feedback!

OHDSI/Legend Large-Scale Evidence Generation and Evaluation in a Network of Databases

extras/BloodPressureFunctions.R In OHDSI/Legend: Large-Scale Evidence Generation and Evaluation in a Network of Databases

R Package Documentation

Browse R Packages

We want your feedback!

OHDSI/Legend
Large-Scale Evidence Generation and Evaluation in a Network of Databases

extras/BloodPressureFunctions.R
In OHDSI/Legend: Large-Scale Evidence Generation and Evaluation in a Network of Databases