# Copyright 2018 Observational Health Data Sciences and Informatics
#
# This file is part of Legend
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Code used in databases that have blood pressure (BP) data to see if we achieved balance on BP.
# Assumes the database connection details and output folder etc. have been set.
# Run this first: Fetch BP values from server --------------------------------------------
indicationFolder <- file.path(outputFolder, indicationId)
bpFolder <- file.path(indicationFolder, "bp")
if (!file.exists(bpFolder)) {
dir.create(bpFolder)
}
prepareForDataFetch <- function(conn, indicationFolder) {
pairedCohortTable <- paste(tablePrefix, tolower(indicationId), "pair_cohort", sep = "_")
cohortsFolder <- file.path(indicationFolder, "allCohorts")
exposureSummary <- read.csv(file.path(indicationFolder,
"pairedExposureSummaryFilteredBySize.csv"))
table <- exposureSummary[, c("targetId", "comparatorId")]
colnames(table) <- SqlRender::camelCaseToSnakeCase(colnames(table))
DatabaseConnector::insertTable(connection = conn,
tableName = "#comparisons",
data = table,
dropTableIfExists = TRUE,
createTable = TRUE,
tempTable = TRUE,
oracleTempSchema = oracleTempSchema)
sql <- SqlRender::loadRenderTranslateSql("UnionExposureCohorts.sql",
"Legend",
dbms = connectionDetails$dbms,
oracleTempSchema = oracleTempSchema,
cohort_database_schema = cohortDatabaseSchema,
paired_cohort_table = pairedCohortTable)
DatabaseConnector::executeSql(conn, sql, progressBar = FALSE, reportOverallTime = FALSE)
sql <- "TRUNCATE TABLE #comparisons; DROP TABLE #comparisons;"
sql <- SqlRender::translateSql(sql = sql,
targetDialect = connectionDetails$dbms,
oracleTempSchema = oracleTempSchema)$sql
DatabaseConnector::executeSql(conn, sql, progressBar = FALSE, reportOverallTime = FALSE)
# Just to make sure: check of rowIds are consistent with those generated before:
sql <- "SELECT row_id, subject_id, cohort_start_date FROM #exposure_cohorts"
sql <- SqlRender::translateSql(sql = sql,
targetDialect = connectionDetails$dbms,
oracleTempSchema = oracleTempSchema)$sql
newCohorts <- DatabaseConnector::querySql(conn, sql)
colnames(newCohorts) <- SqlRender::snakeCaseToCamelCase(colnames(newCohorts))
newCohorts <- newCohorts[order(newCohorts$rowId,
newCohorts$subjectId,
newCohorts$cohortStartDate), ]
row.names(newCohorts) <- NULL
allCohorts <- readRDS(file.path(cohortsFolder, "allCohorts.rds"))
allCohorts <- allCohorts[, colnames(newCohorts)]
allCohorts <- unique(allCohorts)
allCohorts <- allCohorts[order(allCohorts$rowId,
allCohorts$subjectId,
allCohorts$cohortStartDate), ]
row.names(allCohorts) <- NULL
if (!all.equal(allCohorts, newCohorts)) {
stop("row IDs have changed. Hot swap failed")
}
ParallelLogger::logInfo("Verified that rowIds are the same")
}
conn <- DatabaseConnector::connect(connectionDetails)
sql <- "SELECT measurement_type_concept_id, COUNT(*)
FROM @cdm_database_schema.measurement
WHERE measurement_concept_id IN (3004249, 3012888)
GROUP BY measurement_type_concept_id;"
sql <- SqlRender::renderSql(sql, cdm_database_schema = cdmDatabaseSchema)$sql
sql <- SqlRender::translateSql(sql, targetDialect = connectionDetails$dbms)$sql
types <- DatabaseConnector::querySql(conn, sql)
querySql(conn, "SELECT * FROM cdm_optum_panther_v735.dbo.concept WHERE concept_id = 45754907")
querySql(conn, "SELECT TOP 10 * FROM cdm_optum_panther_v735.dbo.measurement WHERE measurement_concept_id IN (3004249, 3012888)")
sql <- "SELECT MEASUREMENT_SOURCE_VALUE, COUNT(*)
FROM @cdm_database_schema.measurement
WHERE measurement_concept_id IN (3004249, 3012888)
GROUP BY MEASUREMENT_SOURCE_VALUE;"
sql <- SqlRender::renderSql(sql, cdm_database_schema = cdmDatabaseSchema)$sql
sql <- SqlRender::translateSql(sql, targetDialect = connectionDetails$dbms)$sql
types <- DatabaseConnector::querySql(conn, sql)
prepareForDataFetch(conn, indicationFolder)
sql <- "SELECT row_id,
value_as_number,
measurement_date,
concept_id,
concept_name
FROM @cdm_database_schema.measurement
INNER JOIN @cdm_database_schema.concept
ON measurement_concept_id = concept_id
INNER JOIN #exposure_cohorts
ON person_id = subject_id
AND measurement_date <= cohort_start_date
AND measurement_date > DATEADD(DAY, -365, cohort_start_date)
WHERE concept_id IN (3004249, 3012888)"
sql <- SqlRender::renderSql(sql, cdm_database_schema = cdmDatabaseSchema)$sql
sql <- SqlRender::translateSql(sql, targetDialect = connectionDetails$dbms)$sql
bps <- DatabaseConnector::querySql(conn, sql)
colnames(bps) <- SqlRender::snakeCaseToCamelCase(colnames(bps))
DatabaseConnector::disconnect(conn)
bps <- bps[!is.na(bps$valueAsNumber), ]
bps <- bps[order(bps$rowId, bps$measurementDate, decreasing = TRUE), ]
bps <- bps[!duplicated(paste(bps$rowId, bps$conceptId)), ]
saveRDS(bps, file.path(bpFolder, "bps.rds"))
# Run this next: Compute balance per TC --------------------------------------------------------
indicationFolder <- file.path(outputFolder, indicationId)
bpFolder <- file.path(indicationFolder, "bp")
bps <- readRDS(file.path(bpFolder, "bps.rds"))
bps <- bps[bps$valueAsNumber < 250, ]
bps <- bps[bps$valueAsNumber > 25, ]
outcomeModelReference <- readRDS(file.path(indicationFolder,
"cmOutput",
"outcomeModelReference1.rds"))
outcomeModelReference <- outcomeModelReference[outcomeModelReference$analysisId %in% c(1,3), ]
outcomeModelReference <- outcomeModelReference[order(outcomeModelReference$cohortMethodDataFolder, outcomeModelReference$outcomeId), ]
outcomeModelReference <- outcomeModelReference[!duplicated(paste(outcomeModelReference$targetId, outcomeModelReference$comparatorId, outcomeModelReference$analysisId)), ]
exposureSummary <- read.csv(file.path(indicationFolder, "pairedExposureSummaryFilteredBySize.csv"))
outcomeModelReference <- merge(outcomeModelReference, exposureSummary[, c("targetId", "comparatorId", "targetName", "comparatorName")])
rows <- split(outcomeModelReference, 1:nrow(outcomeModelReference))
# For now: only pick one row:
rowIndex <- which(outcomeModelReference$targetName == "Hydrochlorothiazide" & outcomeModelReference$comparatorName == "Chlorthalidone")
rows <- list(rows[[rowIndex]])
compBal <- function(row, indicationFolder) {
#row <- rows[[1]]
cmData <- CohortMethod::loadCohortMethodData(file.path(indicationFolder,
"cmOutput",
row$cohortMethodDataFolder),
skipCovariates = TRUE)
sharedPs <- readRDS(file.path(indicationFolder, "cmOutput", row$sharedPsFile))
strataPop <- CohortMethod::stratifyByPs(sharedPs, numberOfStrata = 10)
# strataPop <- readRDS(file.path(indicationFolder, "cmOutput", row$strataFile))
# Flip T and C if needed:
strataPop$treatment <- 1 - strataPop$treatment
cmData$cohorts$treatment <- 1-cmData$cohorts$treatment
temp <- row$targetName
row$targetName <- row$comparatorName
row$comparatorName <- temp
# Try variable ratio matching:
# strataPop <- CohortMethod::matchOnPs(strataPop, maxRatio = 100)
# Conclusion: does not fix imbalance
if (nrow(strataPop) == 0) {
return(NULL)
}
writeLines(paste0("Number of subjects starting ", row$targetName,": ", sum(strataPop$treatment == 1)))
writeLines(paste0("Number of subjects starting ", row$comparatorName,": ", sum(strataPop$treatment == 0)))
# Restricting before cohort to just those in the after cohort:
subset <- bps[bps$rowId %in% strataPop$rowId, ]
strataPop <- strataPop[strataPop$rowId %in% subset$rowId, ]
cmData$cohorts <- cmData$cohorts[cmData$cohorts$rowId %in% subset$rowId, ]
writeLines("After restricting to those with blood pressure measurements:")
writeLines(paste0("Number of subjects starting ", row$targetName,": ", sum(strataPop$treatment == 1)))
writeLines(paste0("Number of subjects starting ", row$comparatorName,": ", sum(strataPop$treatment == 0)))
# Try stratifying by BP deciles in addition to PS:
# cmData$covariates <- ff::as.ffdf(data.frame(rowId = subset$rowId,
# covariateId = subset$conceptId,
# covariateValue = round(subset$valueAsNumber/10)))
# subsetRef <- unique(subset[, c("conceptId", "conceptName")])
# cmData$covariateRef <- ff::as.ffdf(data.frame(covariateId = subsetRef$conceptId,
# covariateName = subsetRef$conceptName))
# strataPop <- CohortMethod::stratifyByPsAndCovariates(strataPop, numberOfStrata = 10, cohortMethodData = cmData, covariateIds = c(3012888, 3004249))
# strataSizes <- aggregate(rowId ~ stratumId, strataPop, length)
# Conclusion: achieves perfect balance, but removes a lot of people (in strata with size 1)
# Create histogram:
m <- merge(subset, strataPop[, c("rowId", "treatment", "stratumId")])
m$group <- as.character(row$targetName)
m$group[m$treatment == 0] <- as.character(row$comparatorName)
m$group <- factor(m$group, levels = c(as.character(row$targetName),
as.character(row$comparatorName)))
m$conceptName[m$conceptName == "BP diastolic"] <- "Diastolic"
m$conceptName[m$conceptName == "BP systolic"] <- "Systolic"
plot <- ggplot2::ggplot(m, ggplot2::aes(x = valueAsNumber, group = group, color = group, fill = group)) +
ggplot2::geom_histogram(position = "identity", binwidth = 10) +
ggplot2::facet_grid(conceptName~.) +
ggplot2::scale_fill_manual(values = c(rgb(0.8, 0, 0, alpha = 0.5), rgb(0, 0, 0.8, alpha = 0.5))) +
ggplot2::scale_color_manual(values = c(rgb(0.8, 0, 0, alpha = 0.5), rgb(0, 0, 0.8, alpha = 0.5))) +
ggplot2::xlab("Blood pressure") +
ggplot2::ylab("Count") +
ggplot2::xlim(c(50,200)) +
ggplot2::theme(legend.position = "top",
legend.title = ggplot2::element_blank())
before <- plot + ggplot2::facet_grid(conceptName~.)
after <- plot + ggplot2::facet_grid(conceptName~stratumId)
ggplot2::ggsave(file.path(outputFolder, sprintf("Before_%s_%s_histogram.png", as.character(row$targetName), as.character(row$comparatorName))), before, width = 5, height = 4, dpi = 300)
ggplot2::ggsave(file.path(outputFolder, sprintf("After_%s_%s_histogram.png", as.character(row$targetName), as.character(row$comparatorName))), after, width = 11, height = 5, dpi = 300)
# Density plot with custom smoothing:
before <- data.frame()
after <- data.frame()
for (conceptName in c("Diastolic", "Systolic")) {
for (group in c(as.character(row$targetName), as.character(row$comparatorName))) {
d <- density(m$valueAsNumber[m$conceptName == conceptName & m$group == group], bw = 5, from = 50, to = 250, n = 210)
before <- rbind(before,
data.frame(x = d$x,
y = d$y,
conceptName = conceptName,
group = group))
for (stratumId in unique(m$stratumId)) {
d <- density(m$valueAsNumber[m$conceptName == conceptName & m$group == group & m$stratumId == stratumId], bw = 5, from = 50, to = 250, n = 210)
after <- rbind(after,
data.frame(x = d$x,
y = d$y,
conceptName = conceptName,
group = group,
stratumId = stratumId))
}
}
}
plot <- ggplot2::ggplot(before, ggplot2::aes(x = x, y = y, group = group, color = group, fill = group)) +
ggplot2::geom_area(position = "identity") +
ggplot2::scale_fill_manual(values = c(rgb(0.8, 0, 0, alpha = 0.5), rgb(0, 0, 0.8, alpha = 0.5))) +
ggplot2::scale_color_manual(values = c(rgb(0.8, 0, 0, alpha = 0.5), rgb(0, 0, 0.8, alpha = 0.5))) +
ggplot2::facet_grid(conceptName~.) +
ggplot2::xlab("Blood pressure") +
ggplot2::ylab("Density") +
ggplot2::xlim(c(50,200)) +
ggplot2::theme(legend.position = "top",
legend.title = ggplot2::element_blank())
ggplot2::ggsave(file.path(outputFolder, sprintf("Before_%s_%s_density.png", as.character(row$targetName), as.character(row$comparatorName))), plot, width = 5, height = 4, dpi = 300)
plot <- ggplot2::ggplot(after, ggplot2::aes(x = x, y = y, group = group, color = group, fill = group)) +
ggplot2::geom_area(position = "identity") +
ggplot2::scale_fill_manual(values = c(rgb(0.8, 0, 0, alpha = 0.5), rgb(0, 0, 0.8, alpha = 0.5))) +
ggplot2::scale_color_manual(values = c(rgb(0.8, 0, 0, alpha = 0.5), rgb(0, 0, 0.8, alpha = 0.5))) +
ggplot2::facet_grid(conceptName~stratumId) +
ggplot2::xlab("Blood pressure") +
ggplot2::ylab("Density") +
ggplot2::xlim(c(50,200)) +
ggplot2::theme(legend.position = "top",
legend.title = ggplot2::element_blank())
ggplot2::ggsave(file.path(outputFolder, sprintf("After_%s_%s_density.png", as.character(row$targetName), as.character(row$comparatorName))), plot, width = 11, height = 5, dpi = 300)
# Use RItools to see if balance results consistent:
# library(RItools)
# m <- merge(data.frame(rowId = subset$rowId[subset$conceptName == "BP diastolic"],
# dbp = subset$valueAsNumber[subset$conceptName == "BP diastolic"]),
# data.frame(rowId = subset$rowId[subset$conceptName == "BP systolic"],
# sbp = subset$valueAsNumber[subset$conceptName == "BP systolic"]))
# m <- merge(m, strataPop[, c("rowId", "treatment", "stratumId")])
# xBalance(treatment ~ sbp + dbp, strata = factor(m$stratumId), data = m, report = c("all"))
# xBalance(treatment ~ sbp + dbp, data = m, report = c("all"))
# Compute balance
cmData$covariates <- ff::as.ffdf(data.frame(rowId = subset$rowId,
covariateId = subset$conceptId,
covariateValue = subset$valueAsNumber))
subsetRef <- unique(subset[, c("conceptId", "conceptName")])
cmData$covariateRef <- ff::as.ffdf(data.frame(covariateId = subsetRef$conceptId,
covariateName = subsetRef$conceptName))
bal <- CohortMethod::computeCovariateBalance(strataPop, cmData)
bal$targetId <- row$targetId
bal$targetName <- row$targetName
bal$comparatorId <- row$comparatorId
bal$comparatorName <- row$comparatorName
bal$analysisId <- row$analysisId
allBalFile <- file.path(indicationFolder,
"balance",
sprintf("Bal_t%s_c%s_a2.rds", row$targetId, row$comparatorId))
allBal <- readRDS(allBalFile)
bal$maxStdDiffOther <- max(allBal$afterMatchingStdDiff, na.rm = TRUE)
bal$minStdDiffOther <- min(allBal$afterMatchingStdDiff, na.rm = TRUE)
return(bal)
}
# cluster <- ParallelLogger::makeCluster(5)
# rows <- split(outcomeModelReference, 1:nrow(outcomeModelReference))
# bal <- ParallelLogger::clusterApply(cluster,
# rows,
# compBal,
# indicationFolder = indicationFolder,
# bps = bps)
# ParallelLogger::stopCluster(cluster)
bal <- plyr::llply(rows,
compBal,
indicationFolder = indicationFolder)
bal <- do.call("rbind", bal)
write.csv(bal, file.path(bpFolder, "balance.csv"), row.names = FALSE)
# Test refitting PS model using BP variables as splines -----------------------------------------------
indicationFolder <- file.path(outputFolder, indicationId)
bpFolder <- file.path(indicationFolder, "bp")
bps <- readRDS(file.path(bpFolder, "bps.rds"))
bps <- bps[bps$valueAsNumber < 250, ]
bps <- bps[bps$valueAsNumber > 25, ]
outcomeModelReference1 <- readRDS(file.path(indicationFolder,
"cmOutput",
"outcomeModelReference1.rds"))
outcomeModelReference2 <- readRDS(file.path(indicationFolder,
"cmOutput",
"outcomeModelReference2.rds"))
outcomeModelReference3 <- readRDS(file.path(indicationFolder,
"cmOutput",
"outcomeModelReference3.rds"))
outcomeModelReference <- rbind(outcomeModelReference1, outcomeModelReference2, outcomeModelReference3)
outcomeModelReference <- outcomeModelReference[outcomeModelReference$analysisId %in% c(1,3), ]
exposureSummary <- read.csv(file.path(indicationFolder, "pairedExposureSummaryFilteredBySize.csv"))
outcomeModelReference <- merge(outcomeModelReference, exposureSummary[, c("targetId", "comparatorId", "targetName", "comparatorName")])
# For now: only pick one TC:
tcIdx <- which(outcomeModelReference$targetName == "Hydrochlorothiazide" & outcomeModelReference$comparatorName == "Chlorthalidone")
rows <- outcomeModelReference[tcIdx, ]
row <- rows[1, ]
# Create new CohortMethodData object, restricting to people with BP data, and adding BP as splines
cmData <- CohortMethod::loadCohortMethodData(file.path(indicationFolder,
"cmOutput",
row$cohortMethodDataFolder),
skipCovariates = FALSE)
subset <- bps[bps$rowId %in% cmData$cohorts$rowId, ]
# Convert to splines:
newCovars <- data.frame()
newCovarRef <- data.frame()
for (conceptId in unique(subset$conceptId)) {
# conceptId <- 3012888
measurement <- subset[subset$conceptId == conceptId, ]
designMatrix <- splines::bs(measurement$valueAsNumber, 5)
sparse <- lapply(1:5, function(x) data.frame(rowId = measurement$rowId,
covariateId = conceptId * 10 + x,
covariateValue = designMatrix[, x]))
sparse <- do.call("rbind", sparse)
newCovars <- rbind(newCovars, sparse)
ref <- data.frame(covariateId = conceptId * 10 + 1:5,
covariateName = paste(measurement$conceptName[1], 1:5),
analysisId = conceptId,
conceptId = conceptId)
newCovarRef <- rbind(newCovarRef, ref)
}
covariates <- cmData$covariates
covariates <- covariates[ffbase::`%in%`(covariates$rowId, subset$rowId), ]
covariates <- ffbase::ffdfappend(covariates, newCovars)
covariateRef <- cmData$covariateRef
covariateRef <- ffbase::ffdfappend(covariateRef, newCovarRef)
newCmData <- cmData
newCmData$cohorts <- newCmData$cohorts[newCmData$cohorts$rowId %in% subset$rowId, ]
attrition <- attr(newCmData$cohorts, "metaData")$attrition
attrition <- rbind(attrition,
data.frame(description = "Having BP data",
targetPersons = sum(newCmData$cohort$treatment == 1),
comparatorPersons = sum(newCmData$cohort$treatment == 0),
targetExposures = sum(newCmData$cohort$treatment == 1),
comparatorExposures = sum(newCmData$cohort$treatment == 0)))
attr(newCmData$cohorts, "metaData")$attrition <- attrition
newCmData$outcomes <- newCmData$outcomes[newCmData$outcomes$rowId %in% subset$rowId, ]
newCmData$covariates <- covariates
newCmData$covariateRef <- covariateRef
CohortMethod::saveCohortMethodData(cohortMethodData = newCmData,
file = file.path(bpFolder,row$cohortMethodDataFolder),
compress = TRUE)
# Fit propensity model:
cmData <- CohortMethod::loadCohortMethodData(file.path(bpFolder, row$cohortMethodDataFolder))
subgroupCovariateIds <- c(1998, 2998, 3998, 4998, 5998, 6998, 7998, 8998)
ps <- CohortMethod::createPs(cohortMethodData = cmData,
control = Cyclops::createControl(noiseLevel = "quiet",
cvType = "auto",
tolerance = 2e-07,
cvRepetitions = 1,
fold = 10,
startingVariance = 0.01,
seed = 123,
threads = 10),
stopOnError = TRUE,
excludeCovariateIds = subgroupCovariateIds,
maxCohortSizeForFitting = 1e+05)
saveRDS(ps, file.path(bpFolder, row$sharedPsFile))
# Overall balance:
studyPop <- CohortMethod::createStudyPopulation(population = ps,
removeDuplicateSubjects = "keep first",
removeSubjectsWithPriorOutcome = TRUE,
riskWindowStart = 1,
riskWindowEnd = 0,
addExposureDaysToEnd = TRUE,
minDaysAtRisk = 1,
censorAtNewRiskWindow = TRUE)
strataPop <- CohortMethod::stratifyByPs(studyPop, numberOfStrata = 10, baseSelection = "all")
bal <- CohortMethod::computeCovariateBalance(strataPop, cmData)
CohortMethod::plotCovariateBalanceScatterPlot(bal, fileName = file.path(bpFolder, "BalanceAfterStrataUsingBp.png"))
CohortMethod::plotCovariateBalanceOfTopVariables(bal, fileName = file.path(bpFolder, "BalanceTopAfterStrataUsingBp.png"))
# matchedPop <- CohortMethod::matchOnPs(studyPop, maxRatio = 100)
#
# bal <- CohortMethod::computeCovariateBalance(matchedPop, cmData)
# CohortMethod::plotCovariateBalanceScatterPlot(bal)
cmData$covariates <- ff::as.ffdf(data.frame(rowId = subset$rowId,
covariateId = subset$conceptId,
covariateValue = subset$valueAsNumber))
subsetRef <- unique(subset[, c("conceptId", "conceptName")])
cmData$covariateRef <- ff::as.ffdf(data.frame(covariateId = subsetRef$conceptId,
covariateName = subsetRef$conceptName))
bal <- CohortMethod::computeCovariateBalance(strataPop, cmData)
# Recompute HR for each outcome:
onTreatment <- rows[rows$analysisId == 1, ]
analysisFolder <- file.path(bpFolder, "Analysis_1")
if (!file.exists(analysisFolder)) {
dir.create(analysisFolder)
}
computeHr <- function(i) {
# i = 1
omFile <- file.path(bpFolder, onTreatment$outcomeModelFile[i])
if (!file.exists(omFile)) {
studyPop <- CohortMethod::createStudyPopulation(population = ps,
cohortMethodData = cmData,
outcomeId = onTreatment$outcomeId[i],
removeDuplicateSubjects = "keep first",
removeSubjectsWithPriorOutcome = TRUE,
riskWindowStart = 1,
riskWindowEnd = 0,
addExposureDaysToEnd = TRUE,
minDaysAtRisk = 1,
censorAtNewRiskWindow = TRUE)
strataPop <- CohortMethod::stratifyByPs(studyPop, numberOfStrata = 10, baseSelection = "all")
outcomeModel <- CohortMethod::fitOutcomeModel(population = strataPop,
stratified = TRUE,
modelType = "cox")
saveRDS(outcomeModel, omFile)
}
}
plyr::l_ply(1:nrow(onTreatment), computeHr, .progress = "text")
originalSummary <- CohortMethod::summarizeAnalyses(onTreatment, file.path(indicationFolder, "cmOutput"))
bpAdjustedSummary <- CohortMethod::summarizeAnalyses(onTreatment, bpFolder)
pathToCsv <- file.path(indicationFolder, "signalInjectionSummary.csv")
siSummary <- read.csv(pathToCsv)
siSummary <- siSummary[siSummary$exposureId == row$targetId, ]
pcs <- data.frame(outcomeId = siSummary$newOutcomeId, targetEffectSize = siSummary$targetEffectSize)
pathToCsv <- system.file("settings", "NegativeControls.csv", package = "Legend")
negativeControls <- read.csv(pathToCsv)
negativeControls <- negativeControls[negativeControls$indicationId == indicationId, ]
ncs <- data.frame(outcomeId = negativeControls$cohortId, targetEffectSize = 1)
controls <- rbind(pcs, ncs)
pathToCsv <- system.file("settings", "OutcomesOfInterest.csv", package = "Legend")
outcomesOfInterest <- read.csv(pathToCsv, stringsAsFactors = FALSE)
outcomesOfInterest <- outcomesOfInterest[outcomesOfInterest$indicationId == indicationId, ]
calibrate <- function(estimates) {
controlEstimates <- merge(estimates, controls)
errorModel <- EmpiricalCalibration::fitSystematicErrorModel(logRr = controlEstimates$logRr,
seLogRr = controlEstimates$seLogRr,
trueLogRr = log(controlEstimates$targetEffectSize))
# EmpiricalCalibration::plotCiCalibrationEffect(logRr = controlEstimates$logRr,
# seLogRr = controlEstimates$seLogRr,
# trueLogRr = log(controlEstimates$targetEffectSize))
calibrated <- EmpiricalCalibration::calibrateConfidenceInterval(logRr = estimates$logRr,
seLogRr = estimates$seLogRr,
model = errorModel)
calibrated$rr <- exp(calibrated$logRr)
calibrated$ci95lb <- exp(calibrated$logLb95Rr)
calibrated$ci95ub <- exp(calibrated$logUb95Rr)
calibrated$logLb95Rr <- NULL
calibrated$logUb95Rr <- NULL
calibrated$outcomeId <- estimates$outcomeId
calibrated$estimate <- "Calibrated"
estimates$estimate <- "Uncalibrated"
estimates <- rbind(estimates[, colnames(calibrated)], calibrated)
}
originalSummary <- calibrate(originalSummary)
bpAdjustedSummary <- calibrate(bpAdjustedSummary)
originalSummary$type <- "Original"
bpAdjustedSummary$type <- "Adjusting for\nblood pressure"
vizData <- rbind(originalSummary, bpAdjustedSummary)
vizData <- merge(vizData, data.frame(outcomeId = outcomesOfInterest$cohortId,
outcomeName = outcomesOfInterest$name))
vizData <- vizData[!is.na(vizData$seLogRr), ]
outcomeNames <- unique(vizData$outcomeName)
outcomeNames <- outcomeNames[order(outcomeNames, decreasing = TRUE)]
vizData$y <- match(vizData$outcomeName, outcomeNames) - 0.1 + 0.2*(vizData$type == "Original")
# flip HR:
temp <- 1/vizData$ci95lb
vizData$ci95lb <- 1/vizData$ci95ub
vizData$ci95ub <- temp
vizData$rr <- 1/vizData$rr
breaks <- c(0.1, 0.25, 0.5, 1, 2, 4, 8, 10)
odd <- seq(1, length(outcomeNames), by = 2)
bars <- data.frame(xmin = 0.01,
xmax = 100,
ymin = odd - 0.5,
ymax = odd + 0.5,
type = "Original",
rr = 1,
y = 1)
vizData$estimate <- factor(vizData$estimate, levels = c("Uncalibrated", "Calibrated"))
ggplot2::ggplot(vizData, ggplot2::aes(x = rr, y = y, color = type, shape = type, xmin = ci95lb, xmax = ci95ub)) +
ggplot2::geom_rect(ggplot2::aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), fill = "#EEEEEE", colour = "#EEEEEE", size = 0, data = bars) +
ggplot2::geom_vline(xintercept = breaks, colour = "#AAAAAA", lty = 1, size = 0.25) +
ggplot2::geom_vline(xintercept = 1, size = 0.5) +
ggplot2::geom_errorbarh(alpha = 0.65, size = 0.6, height = 0.3) +
ggplot2::geom_point(alpha = 0.65, size = 2) +
ggplot2::scale_y_continuous(breaks = 1:length(outcomeNames), labels = outcomeNames) +
ggplot2::coord_cartesian(xlim = c(0.1, 10)) +
ggplot2::scale_x_log10(breaks = breaks, labels = breaks) +
ggplot2::scale_color_manual(values = c(rgb(0.8, 0, 0, alpha = 0.65), rgb(0, 0, 0.8, alpha = 0.65))) +
ggplot2::xlab("Hazard Ratio") +
ggplot2::facet_grid(~estimate) +
ggplot2::theme(axis.title.y = ggplot2::element_blank(),
legend.title = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
panel.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
axis.ticks = ggplot2::element_blank(),
strip.background = ggplot2::element_blank())
ggplot2::ggsave(file.path(bpFolder, "hrs.png"), width = 8, height = 10, dpi = 400)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.