# @file ICTPD.R
#
# Copyright 2022 Observational Health Data Sciences and Informatics
#
# This file is part of IcTemporalPatternDiscovery
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# @author Uppsala Monitoring Centre
# @author Tomas Bergvall
#' @keywords internal
"_PACKAGE"
#' @import DatabaseConnector
#' @importFrom stats aggregate printCoefmat qgamma qnorm
#' @importFrom grDevices rgb
NULL
#' @title
#' Get ICTPD counts from database
#'
#' @description
#' This function is used to load the counts needed to compute the ICTPD from a database in OMOP CDM
#' format.
#'
#' @param connectionDetails An R object of type \code{ConnectionDetails} created using
#' the function \code{createConnectionDetails} in the
#' \code{DatabaseConnector} package.
#' @param cdmDatabaseSchema Name of database schema that contains OMOP CDM and
#' vocabulary.
#' @param oracleTempSchema DEPRECATED: use `tempEmulationSchema` instead.
#' @param tempEmulationSchema Some database platforms like Oracle and Impala do not truly support temp tables. To
#' emulate temp tables, provide a schema with write privileges where temp tables
#' can be created.
#' @param cdmVersion Define the OMOP CDM version used: currently supports "5".
#' @param exposureOutcomePairs A data frame with at least two columns:
#' \itemize{
#' \item {"exposureId" containing the drug_concept_ID or
#' cohort_concept_id of the exposure variable}
#' \item {"outcomeId" containing the condition_concept_ID or
#' cohort_concept_id of the outcome variable}
#' }
#'
#' @param exposureDatabaseSchema The name of the database schema that is the location where
#' the exposure data is available. If exposureTable =
#' DRUG_ERA, exposureSchema is not used by assumed to be
#' cdmSchema. Requires read permissions to this database.
#' @param exposureTable The tablename that contains the exposure cohorts. If
#' exposureTable <> DRUG_ERA, then expectation is
#' exposureTable has format of COHORT table:
#' COHORT_DEFINITION_ID, SUBJECT_ID, COHORT_START_DATE,
#' COHORT_END_DATE.
#' @param outcomeDatabaseSchema The name of the database schema that is the location where
#' the data used to define the outcome cohorts is available.
#' If exposureTable = CONDITION_ERA, exposureSchema is not
#' used by assumed to be cdmSchema. Requires read permissions
#' to this database.
#' @param outcomeTable The tablename that contains the outcome cohorts. If
#' outcomeTable <> CONDITION_OCCURRENCE, then expectation is
#' outcomeTable has format of COHORT table:
#' COHORT_DEFINITION_ID, SUBJECT_ID, COHORT_START_DATE,
#' COHORT_END_DATE.
#' @param riskPeriodStart start of the risk period - can be set between 0 and 99999,
#' default is 1.
#' @param riskPeriodEnd end of the risk period - can be set between 0 and 99999,
#' default is 30.
#' @param controlPeriodStart start of the control period - can be set between -99999 and
#' 0, default is -1080.
#' @param controlPeriodEnd end of the control period - can be set between -99999 and
#' 0, default is -361.
#' @param censor a flag indicating wether the method should censor the
#' observation period at the end of exposure or not. Available
#' input is 0 or 1 with default = 0.
#'
#' @return
#' An object of type \code{ictpdData} containing counts that can be used in the
#' \code{\link{calculateStatisticsIc}} function.
#'
#' @template Example
#'
#' @export
getDbIctpdData <- function(connectionDetails,
cdmDatabaseSchema,
oracleTempSchema = NULL,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
cdmVersion = "5",
exposureOutcomePairs,
exposureDatabaseSchema = cdmDatabaseSchema,
exposureTable = "drug_era",
outcomeDatabaseSchema = cdmDatabaseSchema,
outcomeTable = "condition_era",
controlPeriodStart = -1080,
controlPeriodEnd = -361,
riskPeriodStart = 1,
riskPeriodEnd = 30,
censor = FALSE) {
if (!is.null(oracleTempSchema) && oracleTempSchema != "") {
warning("The 'oracleTempSchema' argument is deprecated. Use 'tempEmulationSchema' instead.")
tempEmulationSchema <- oracleTempSchema
}
start <- Sys.time()
exposureTable <- tolower(exposureTable)
outcomeTable <- tolower(outcomeTable)
if (exposureTable == "drug_era") {
exposureStartDate <- "drug_era_start_date"
exposureEndDate <- "drug_era_end_date"
exposureConceptId <- "drug_concept_id"
exposurePersonId <- "person_id"
} else if (exposureTable == "drug_exposure") {
exposureStartDate <- "drug_exposure_start_date"
exposureEndDate <- "drug_exposure_end_date"
exposureConceptId <- "drug_concept_id"
exposurePersonId <- "person_id"
} else {
exposureStartDate <- "cohort_start_date"
exposureEndDate <- "cohort_end_date"
exposureConceptId <- "cohort_definition_id"
exposurePersonId <- "subject_id"
}
if (outcomeTable == "condition_era") {
outcomeStartDate <- "condition_era_start_date"
outcomeEndDate <- "condition_era_end_date"
outcomeConceptId <- "condition_concept_id"
outcomePersonId <- "person_id"
} else if (outcomeTable == "condition_occurrence") {
outcomeStartDate <- "condition_start_date"
outcomeEndDate <- "condition_end_date"
outcomeConceptId <- "condition_concept_id"
outcomePersonId <- "person_id"
} else {
outcomeStartDate <- "cohort_start_date"
outcomeEndDate <- "cohort_end_date"
outcomeConceptId <- "cohort_definition_id"
outcomePersonId <- "subject_id"
}
# Check if connection already open:
if (!"conn" %in% names(connectionDetails)) {
conn <- DatabaseConnector::connect(connectionDetails)
on.exit(DatabaseConnector::disconnect(conn))
} else {
conn <- connectionDetails$conn
}
exposures <- data.frame(type = 1, id = unique(exposureOutcomePairs$exposureId))
outcomes <- data.frame(type = 2, id = unique(exposureOutcomePairs$outcomeId))
conceptsOfInterest <- rbind(exposures, outcomes)
ParallelLogger::logTrace("Inserting tables of IDs")
DatabaseConnector::insertTable(
connection = conn,
tableName = "#concepts_of_interest",
data = conceptsOfInterest,
tempTable = TRUE,
dropTableIfExists = TRUE
)
exposureOutcome <- data.frame(
exposure_concept_id = exposureOutcomePairs$exposureId,
outcome_concept_id = exposureOutcomePairs$outcomeId
)
DatabaseConnector::insertTable(
connection = conn,
tableName = "#exposure_outcome",
data = exposureOutcome,
tempTable = TRUE,
dropTableIfExists = TRUE
)
sql <- c()
renderedSql <- SqlRender::loadRenderTranslateSql(
sqlFilename = "IctpdParameterizedSQL.sql",
packageName = "IcTemporalPatternDiscovery",
dbms = connectionDetails$dbms,
tempEmulationSchema = tempEmulationSchema,
cdmDatabaseSchema = cdmDatabaseSchema,
riskPeriodStart = riskPeriodStart,
riskPeriodEnd = riskPeriodEnd,
controlPeriodStart = controlPeriodStart,
controlPeriodEnd = controlPeriodEnd,
exposureDatabaseSchema = exposureDatabaseSchema,
exposureTable = exposureTable,
exposureStartDate = exposureStartDate,
exposureEndDate = exposureEndDate,
exposureConceptId = exposureConceptId,
exposurePersonId = exposurePersonId,
outcomeDatabaseSchema = outcomeDatabaseSchema,
outcomeTable = outcomeTable,
outcomeStartDate = outcomeStartDate,
outcomeEndDate = outcomeEndDate,
outcomeConceptId = outcomeConceptId,
outcomePersonId = outcomePersonId,
censor = censor
)
ParallelLogger::logInfo("Computing counts. This could take a while")
DatabaseConnector::executeSql(conn, renderedSql)
sql <- c(sql, renderedSql)
renderedSql <- SqlRender::loadRenderTranslateSql(
sqlFilename = "GetStatisticsData.sql",
packageName = "IcTemporalPatternDiscovery",
dbms = connectionDetails$dbms,
tempEmulationSchema = tempEmulationSchema,
exposureConceptId = exposureConceptId,
outcomeConceptId = outcomeConceptId
)
ParallelLogger::logInfo("Retrieving counts from server")
counts <- DatabaseConnector::querySql(conn, renderedSql)
names(counts) <- toupper(names(counts))
sql <- c(sql, renderedSql)
# Drop tables used in computation:
renderedSql <- SqlRender::loadRenderTranslateSql(
sqlFilename = "DropIctpdTables.sql",
packageName = "IcTemporalPatternDiscovery",
dbms = connectionDetails$dbms
)
DatabaseConnector::executeSql(conn, renderedSql, progressBar = FALSE, reportOverallTime = FALSE)
sql <- c(sql, renderedSql)
metaData <- list(exposureOutcomePairs = exposureOutcomePairs)
result <- list(counts = counts, metaData = metaData)
class(result) <- "ictpdData"
delta <- Sys.time() - start
ParallelLogger::logInfo(paste("Loading took", signif(delta, 3), attr(delta, "units")))
return(result)
}
ic <- function(obs, exp, shape.add = 0.5, rate.add = 0.5, percentile = 0.025) {
ic <- log2((obs + shape.add) / (exp + rate.add))
ic_low <- log2(qgamma(p = percentile, shape = (obs + shape.add), rate = (exp + rate.add)))
ic_high <- log2(qgamma(p = (1 - percentile), shape = (obs + shape.add), rate = (exp + rate.add)))
return(list(ic = ic, ic_low = ic_low, ic_high = ic_high))
}
#' @title
#' compute the IC statistics
#'
#' @param ictpdData An object containing the counts, as created using the
#' \code{\link{getDbIctpdData}} function.
#' @param shrinkage Shrinkage used in IRR calculations, required >0 to deal with 0 case
#' counts, but larger number means more shrinkage. default is 0.5
#' @param icPercentile The lower bound of the credibility interval for the IC values
#' (IClow). default is 0.025,
#' @param metric Defines wether the output will contain the point estimate or the
#' lower bound. Available input is 'IC and 'IC025' default is 'IC025'
#' @param multipleControlPeriods Defines the control periods to use where 100 means the control
#' period defined by controlPeriodStart/End, 010 means the period -30
#' to -1 day before prescription and 001 means the control period on
#' the day of prescription
#' @param multipleRiskPeriods Defines the risk periods to use 10000 is 1-30 days, 01000 is 1 to
#' 360 days, 00100 is 31 to 90 days, 00010 is 91 to 180 and 00001 is
#' 721 to 1080 days after prescription default is '10000'
#' @description
#' Computes the IC statistics.
#'
#' @return
#' An object of type \code{ictpdResults} containing the results.
#'
#' @template Example
#'
#' @export
calculateStatisticsIc <- function(ictpdData,
multipleControlPeriods = "110",
multipleRiskPeriods = "10000",
shrinkage = 0.5,
icPercentile = 0.025,
metric = "IC025") {
if (toupper(metric) == "IC025" & icPercentile != 0.025) {
icPercentile <- 0.025
warning("Forcing icPercentile to 0.025 to be able to compute IC025")
}
comb <- ictpdData$counts
if (nrow(comb) == 0) {
warning("No data found")
result <- list(results = data.frame(), metaData = ictpdData$metaData, metric = metric)
class(result) <- "ictpdResults"
return(result)
}
expectedControl <- c()
controlPeriodItem <- strsplit(toString(multipleControlPeriods[[1]]), "")
tmpMat <- matrix(rep(NA, length(comb[["CXY_CONTROL"]])), ncol = 1)
if (controlPeriodItem[[1]][1] == 1) {
tmpMat <- cbind(
tmpMat,
comb[["CXY_CONTROL"]] / (comb[["CX_CONTROL"]] * (comb[["CY_CONTROL"]] / comb[["C_CONTROL"]]))
)
}
if (controlPeriodItem[[1]][2] == 1) {
tmpMat <- cbind(tmpMat, comb[["CXY_1M"]] / (comb[["CX_1M"]] * (comb[["CY_1M"]] / comb[["C_1M"]])))
}
if (controlPeriodItem[[1]][3] == 1) {
tmpMat <- cbind(tmpMat, comb[["CXY_0M"]] / (comb[["CX_0M"]] * (comb[["CY_0M"]] / comb[["C_0M"]])))
}
if (ncol(tmpMat) == 1) {
tmpMat <- matrix(rep(1, length(comb[["CXY_CONTROL"]])), ncol = 1)
}
tmpMat[is.infinite(tmpMat)] <- NA
tmpMat[is.nan(tmpMat)] <- NA
expectedControl <- apply(tmpMat, 1, function(x) max(x[!is.na(x)]))
# Max of a row with only NA equals -Inf, therefore changed back to NA
expectedControl[expectedControl == -Inf] <- NA
# ---------------------------------- -- Calculate IC for all -- -- observation windows and -- --
# select the largest IC025 -- ----------------------------------
obsPeriodItem <- strsplit(toString(multipleRiskPeriods[[1]]), "")
tmpMat <- matrix(rep(-99999999, length(comb[["CXY_CONTROL"]])), ncol = 1)
comb["IC"] <- c()
comb["IC_low"] <- c()
comb["IC_high"] <- c()
comb["CXY"] <- c()
comb["CX"] <- c()
comb["CY"] <- c()
comb["C"] <- c()
comb["expected"] <- c()
for (cc in 1:length(comb[["CXY_CONTROL"]])) {
maxIC_low <- -999999999
maxIC <- NA
maxIC_high <- NA
CXY <- NA
CX <- NA
CY <- NA
C <- NA
expected <- NA
if (obsPeriodItem[[1]][1] == 1) {
tmpIC <- ic(
comb[["CXY_OBSERVED_1_30"]][cc] # Observed
, (comb[["CX_OBSERVED_1_30"]][cc] *
(comb[["CY_OBSERVED_1_30"]][cc] / comb[["C_OBSERVED_1_30"]][cc])) # Expected_observed
*
expectedControl[cc] # Expected_control
, as.numeric(shrinkage), as.numeric(shrinkage) # Shrinkage factors
, as.numeric(icPercentile)
)
if (!is.na(tmpIC$ic_low)) {
if (maxIC_low < tmpIC$ic_low) {
maxIC <- tmpIC$ic
maxIC_low <- tmpIC$ic_low
maxIC_high <- tmpIC$ic_high
CXY <- comb[["CXY_OBSERVED_1_30"]][cc]
CX <- comb[["CX_OBSERVED_1_30"]][cc]
CY <- comb[["CY_OBSERVED_1_30"]][cc]
C <- comb[["C_OBSERVED_1_30"]][cc]
expected <- (comb[["CX_OBSERVED_1_30"]][cc] * (comb[["CY_OBSERVED_1_30"]][cc] / comb[["C_OBSERVED_1_30"]][cc])) *
expectedControl[cc]
}
}
}
if (obsPeriodItem[[1]][2] == 1) {
tmpIC <- ic(
comb[["CXY_OBSERVED_1_360"]][cc] # Observed
, (comb[["CX_OBSERVED_1_360"]][cc] *
(comb[["CY_OBSERVED_1_360"]][cc] / comb[["C_OBSERVED_1_360"]][cc])) # Expected_observed
* expectedControl[cc] # Expected_control
, as.numeric(shrinkage), as.numeric(shrinkage) # Shrinkage factors
, as.numeric(icPercentile)
)
if (!is.na(tmpIC$ic_low)) {
if (maxIC_low < tmpIC$ic_low) {
maxIC <- tmpIC$ic
maxIC_low <- tmpIC$ic_low
maxIC_high <- tmpIC$ic_high
CXY <- comb[["CXY_OBSERVED_1_360"]][cc]
CX <- comb[["CX_OBSERVED_1_360"]][cc]
CY <- comb[["CY_OBSERVED_1_360"]][cc]
C <- comb[["C_OBSERVED_1_360"]][cc]
expected <- (comb[["CX_OBSERVED_1_360"]][cc] * (comb[["CY_OBSERVED_1_360"]][cc] / comb[["C_OBSERVED_1_360"]][cc])) *
expectedControl[cc]
}
}
}
if (obsPeriodItem[[1]][3] == 1) {
tmpIC <- ic(
comb[["CXY_OBSERVED_31_90"]][cc] # Observed
, (comb[["CX_OBSERVED_31_90"]][cc] *
(comb[["CY_OBSERVED_31_90"]][cc] / comb[["C_OBSERVED_31_90"]][cc])) # Expected_observed
* expectedControl[cc] # Expected_control
, as.numeric(shrinkage), as.numeric(shrinkage) # Shrinkage factors
, as.numeric(icPercentile)
)
if (!is.na(tmpIC$ic_low)) {
if (maxIC_low < tmpIC$ic_low) {
maxIC <- tmpIC$ic
maxIC_low <- tmpIC$ic_low
maxIC_high <- tmpIC$ic_high
CXY <- comb[["CXY_OBSERVED_31_90"]][cc]
CX <- comb[["CX_OBSERVED_31_90"]][cc]
CY <- comb[["CY_OBSERVED_31_90"]][cc]
C <- comb[["C_OBSERVED_31_90"]][cc]
expected <- (comb[["CX_OBSERVED_31_90"]][cc] * (comb[["CY_OBSERVED_31_90"]][cc] / comb[["C_OBSERVED_31_90"]][cc])) *
expectedControl[cc]
}
}
}
if (obsPeriodItem[[1]][4] == 1) {
tmpIC <- ic(
comb[["CXY_OBSERVED_91_180"]][cc] # Observed
, (comb[["CX_OBSERVED_91_180"]][cc] *
(comb[["CY_OBSERVED_91_180"]][cc] / comb[["C_OBSERVED_91_180"]][cc])) # Expected_observed
* expectedControl[cc] # Expected_control
, as.numeric(shrinkage), as.numeric(shrinkage) # Shrinkage factors
, as.numeric(icPercentile)
)
if (!is.na(tmpIC$ic_low)) {
if (maxIC_low < tmpIC$ic_low) {
maxIC <- tmpIC$ic
maxIC_low <- tmpIC$ic_low
maxIC_high <- tmpIC$ic_high
CXY <- comb[["CXY_OBSERVED_91_180"]][cc]
CX <- comb[["CX_OBSERVED_91_180"]][cc]
CY <- comb[["CY_OBSERVED_91_180"]][cc]
C <- comb[["C_OBSERVED_91_180"]][cc]
expected <- (comb[["CX_OBSERVED_91_180"]][cc] * (comb[["CY_OBSERVED_91_180"]][cc] / comb[["C_OBSERVED_91_180"]][cc])) *
expectedControl[cc]
}
}
}
if (obsPeriodItem[[1]][5] == 1) {
tmpIC <- ic(
comb[["CXY_OBSERVED_721_1080"]][cc] # Observed
, (comb[["CX_OBSERVED_721_1080"]][cc] *
(comb[["CY_OBSERVED_721_1080"]][cc] / comb[["C_OBSERVED_721_1080"]][cc])) # Expected_observed
* expectedControl[cc] # Expected_control
, as.numeric(shrinkage), as.numeric(shrinkage) # Shrinkage factors
, as.numeric(icPercentile)
)
if (!is.na(tmpIC$ic_low)) {
if (maxIC_low < tmpIC$ic_low) {
maxIC <- tmpIC$ic
maxIC_low <- tmpIC$ic_low
maxIC_high <- tmpIC$ic_high
CXY <- comb[["CXY_OBSERVED_721_1080"]][cc]
CX <- comb[["CX_OBSERVED_721_1080"]][cc]
CY <- comb[["CY_OBSERVED_721_1080"]][cc]
C <- comb[["C_OBSERVED_721_1080"]][cc]
expected <- (comb[["CX_OBSERVED_721_1080"]][cc] * (comb[["CY_OBSERVED_721_1080"]][cc] / comb[["C_OBSERVED_721_1080"]][cc])) *
expectedControl[cc]
}
}
}
# ----------------------------- -- Store max values -- -----------------------------
if (is.na(maxIC)) {
comb[["IC"]][cc] <- NA
comb[["IC_low"]][cc] <- NA
comb[["IC_high"]][cc] <- NA
comb[["CXY"]][cc] <- NA
comb[["CX"]][cc] <- NA
comb[["CY"]][cc] <- NA
comb[["C"]][cc] <- NA
comb[["expected"]][cc] <- NA
} else {
comb[["IC"]][cc] <- maxIC
comb[["IC_low"]][cc] <- maxIC_low
comb[["IC_high"]][cc] <- maxIC_high
comb[["CXY"]][cc] <- CXY
comb[["CX"]][cc] <- CX
comb[["CY"]][cc] <- CY
comb[["C"]][cc] <- C
comb[["expected"]][cc] <- expected
}
}
# Add some standard metrics that are computed by all OHDSI methods (not very meaningful for ICTPD):
comb$LOG_RR <- comb$IC
comb$SE_LOG_RR <- (comb$IC_high - comb$IC) / qnorm(1 - icPercentile)
if (toupper(metric) == "IC") {
comb$estimate <- comb$IC
} else {
comb$estimate <- comb$IC_low
}
colnames(comb) <- SqlRender::snakeCaseToCamelCase(colnames(comb))
ictpdData$metaData$call <- list(ictpdData$metaData$call, match.call())
result <- list(results = comb, metaData = ictpdData$metaData, metric = metric)
class(result) <- "ictpdResults"
return(result)
}
#' @export
print.ictpdResults <- function(x, ...) {
output <- with(x, subset(x$results, select = c(exposureofinterest, outcomeofinterest, estimate)))
colnames(output) <- c("Exposure concept ID", "Outcome concept ID", x$metric)
printCoefmat(output)
}
#' @export
summary.ictpdResults <- function(object, ...) {
object$results
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.