R/RunAnalyses.R

Defines functions getDiagnosticsSummary getResultsSummary getFileReference calibrateGroup calibrateEstimates summarizeResults .passBooleanToString .selectByType .createSccsModelFileName .createSccsIntervalDataFileName .createStudyPopulationFileName .f .createSccsDataFileName createSccsModelObject createSccsIntervalDataObject createStudyPopObject createSccsDataObject getSccsData which.list createReferenceTable runSccsAnalyses createDefaultSccsMultiThreadingSettings createSccsMultiThreadingSettings

Documented in createDefaultSccsMultiThreadingSettings createSccsMultiThreadingSettings getDiagnosticsSummary getFileReference getResultsSummary runSccsAnalyses

# Copyright 2025 Observational Health Data Sciences and Informatics
#
# This file is part of SelfControlledCaseSeries
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Create SelfControlledCaseSeries multi-threading settings
#'
#' @param getDbSccsDataThreads             The number of parallel threads to use for building the
#'                                         `SccsData` objects.
#' @param createStudyPopulationThreads     The number of parallel threads to use for building the
#'                                         `studyPopulation` objects.
#' @param createIntervalDataThreads        The number of parallel threads to use for building the
#'                                         `SccsIntervalData` objects.
#' @param fitSccsModelThreads              The number of parallel threads to use for fitting the
#'                                         models.
#' @param cvThreads                        The number of parallel threads to use for the cross-
#'                                         validation when estimating the hyperparameter for the
#'                                         outcome model. Note that the total number of CV threads at
#'                                         one time could be `fitSccsModelThreads * cvThreads`.
#' @param calibrationThreads               The number of parallel threads to use for empirical calibration.
#'
#' @return
#' An object of type `SccsMultiThreadingSettings`.
#'
#' @seealso [createDefaultSccsMultiThreadingSettings()]
#'
#' @export
createSccsMultiThreadingSettings <- function(getDbSccsDataThreads = 1,
                                             createStudyPopulationThreads = 1,
                                             createIntervalDataThreads = 1,
                                             fitSccsModelThreads = 1,
                                             cvThreads = 1,
                                             calibrationThreads = 1) {
  errorMessages <- checkmate::makeAssertCollection()
  checkmate::assertInt(getDbSccsDataThreads, lower = 1, add = errorMessages)
  checkmate::assertInt(createStudyPopulationThreads, lower = 1, add = errorMessages)
  checkmate::assertInt(createIntervalDataThreads, lower = 1, add = errorMessages)
  checkmate::assertInt(fitSccsModelThreads, lower = 1, add = errorMessages)
  checkmate::assertInt(cvThreads, lower = 1, add = errorMessages)
  checkmate::assertInt(calibrationThreads, lower = 1, add = errorMessages)
  checkmate::reportAssertions(collection = errorMessages)
  settings <- list()
  for (name in names(formals(createSccsMultiThreadingSettings))) {
    settings[[name]] <- get(name)
  }
  class(settings) <- "SccsMultiThreadingSettings"
  return(settings)
}

#' Create default SelfControlledCaseSeries multi-threading settings
#'
#' @description
#' Create SelfControlledCaseSeries multi-threading settings based on the maximum
#' number of cores to be used.
#'
#' @param maxCores  Maximum number of CPU cores to use.
#'
#' @return
#' An object of type `SccsMultiThreadingSettings`.
#'
#' @seealso [createSccsMultiThreadingSettings()]
#'
#' @examples
#' settings <- createDefaultSccsMultiThreadingSettings(10)
#'
#' @export
createDefaultSccsMultiThreadingSettings <- function(maxCores) {
  errorMessages <- checkmate::makeAssertCollection()
  checkmate::assertInt(maxCores, lower = 1, add = errorMessages)
  checkmate::reportAssertions(collection = errorMessages)
  settings <- createSccsMultiThreadingSettings(
    getDbSccsDataThreads = min(3, maxCores),
    createStudyPopulationThreads = min(3, maxCores),
    createIntervalDataThreads = min(3, maxCores),
    fitSccsModelThreads = min(5, max(1, floor(maxCores / 5))),
    cvThreads = min(5, maxCores),
    calibrationThreads = min(4, maxCores)
  )
  return(settings)
}

#' Run a list of analyses
#'
#' @details
#' Run a list of analyses for the exposures-outcomes of interest. This function will run all
#' specified analyses against all hypotheses of interest, meaning that the total number of outcome
#' models is `length(sccsAnalysisList) * length(exposuresOutcomeList)` When you provide several analyses
#' it will determine whether any of the analyses have anything in common, and will take advantage of
#' this fact.
#'
#' ## Analyses to Exclude
#'
#' Normally, `runSccsAnalyses` will run all combinations of exposures-outcome-analyses settings.
#' However, sometimes we may not need all those combinations. Using the `analysesToExclude` argument,
#' we can remove certain items from the full matrix. This argument should be a data frame with at least
#' one of the following columns:
#'
#' - exposureId
#' - outcomeId
#' - nestingCohortId
#' - analysisId
#'
#' This data frame will be joined to the outcome model reference table before executing, and matching rows
#' will be removed. For example, if one specifies only one exposure ID and analysis ID, then any analyses with
#' that exposure and that analysis ID will be skipped.
#'
#' @param connectionDetails                An R object of type `ConnectionDetails` created using
#'                                         the function [DatabaseConnector::createConnectionDetails()].
#' @param cdmDatabaseSchema                The name of the database schema that contains the OMOP CDM
#'                                         instance.  Requires read permissions to this database. On
#'                                         SQL Server, this should specify both the database and the
#'                                         schema, so for example 'cdm_instance.dbo'.
#' @param tempEmulationSchema              Some database platforms like Oracle and Impala do not truly support
#'                                         temp tables. To emulate temp tables, provide a schema with write
#'                                         privileges where temp tables can be created.
#' @param outcomeDatabaseSchema            The name of the database schema that is the location where
#'                                         the data used to define the outcome cohorts is available. Requires
#'                                         read permissions to this database.
#' @param outcomeTable                     The table name that contains the outcome cohorts.
#' @param exposureDatabaseSchema           The name of the database schema that is the location where
#'                                         the exposure data used to define the exposure cohorts is
#'                                         available. If `exposureTable = "DRUG_ERA"`,
#'                                         `exposureDatabaseSchema` is not used but assumed to be
#'                                         `cdmDatabaseSchema`.  Requires read permissions to this database.
#' @param exposureTable                    The table name that contains the exposure cohorts.  If
#'                                         `exposureTable <> "DRUG_ERA"`, then expectation is `exposureTable`
#'                                         has format of COHORT table: cohort_concept_id, SUBJECT_ID,
#'                                         COHORT_START_DATE, COHORT_END_DATE.
#' @param customCovariateDatabaseSchema    The name of the database schema that is the location where
#'                                         the custom covariate data is available.
#' @param customCovariateTable             Name of the table holding the custom covariates. This table
#'                                         should have the same structure as the cohort table.
#' @param nestingCohortDatabaseSchema      The name of the database schema that is the location
#'                                         where the nesting cohort is defined.
#' @param nestingCohortTable               Name of the table holding the nesting cohort. This table
#'                                         should have the same structure as the cohort table.
#' @param outputFolder                     Name of the folder where all the outputs will written to.
#' @param sccsMultiThreadingSettings       An object of type `SccsMultiThreadingSettings` as created using
#'                                         the [createSccsMultiThreadingSettings()] or
#'                                         [createDefaultSccsMultiThreadingSettings()] functions.
#' @param sccsAnalysesSpecifications       An object of type `SccsAnalysesSpecifications` as created using
#'                                         the [`createSccsAnalysesSpecifications()`] function
#'
#' @return
#' A tibble describing for each exposure-outcome-analysisId combination where the intermediary and
#' outcome model files can be found, relative to the `outputFolder`.
#'
#' @export
runSccsAnalyses <- function(connectionDetails,
                            cdmDatabaseSchema,
                            tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
                            exposureDatabaseSchema = cdmDatabaseSchema,
                            exposureTable = "drug_era",
                            outcomeDatabaseSchema = cdmDatabaseSchema,
                            outcomeTable = "cohort",
                            customCovariateDatabaseSchema = cdmDatabaseSchema,
                            customCovariateTable = "cohort",
                            nestingCohortDatabaseSchema = cdmDatabaseSchema,
                            nestingCohortTable = "cohort",
                            outputFolder,
                            sccsMultiThreadingSettings = createSccsMultiThreadingSettings(),
                            sccsAnalysesSpecifications) {
  errorMessages <- checkmate::makeAssertCollection()
  if (is(connectionDetails, "connectionDetails")) {
    checkmate::assertClass(connectionDetails, "connectionDetails", add = errorMessages)
  } else {
    checkmate::assertClass(connectionDetails, "ConnectionDetails", add = errorMessages)
  }
  checkmate::assertCharacter(cdmDatabaseSchema, len = 1, add = errorMessages)
  checkmate::assertCharacter(tempEmulationSchema, len = 1, null.ok = TRUE, add = errorMessages)
  checkmate::assertCharacter(exposureDatabaseSchema, len = 1, add = errorMessages)
  checkmate::assertCharacter(exposureTable, len = 1, add = errorMessages)
  checkmate::assertCharacter(outcomeDatabaseSchema, len = 1, add = errorMessages)
  checkmate::assertCharacter(outcomeTable, len = 1, add = errorMessages)
  checkmate::assertCharacter(customCovariateDatabaseSchema, len = 1, add = errorMessages)
  checkmate::assertCharacter(customCovariateTable, len = 1, add = errorMessages)
  checkmate::assertCharacter(nestingCohortDatabaseSchema, len = 1, add = errorMessages)
  checkmate::assertCharacter(nestingCohortTable, len = 1, add = errorMessages)
  checkmate::assertCharacter(outputFolder, len = 1, add = errorMessages)
  checkmate::assertClass(sccsMultiThreadingSettings, "SccsMultiThreadingSettings", add = errorMessages)
  checkmate::assertR6(sccsAnalysesSpecifications, "SccsAnalysesSpecifications", add = errorMessages)
  checkmate::reportAssertions(collection = errorMessages)

  uniqueExposuresOutcomeList <- unique(lapply(sccsAnalysesSpecifications$exposuresOutcomeList, function(x) x$toJson()))
  if (length(uniqueExposuresOutcomeList) != length(sccsAnalysesSpecifications$exposuresOutcomeList)) {
    stop("Duplicate exposure-outcomes pairs are not allowed")
  }
  uniqueAnalysisIds <- unlist(unique(ParallelLogger::selectFromList(sccsAnalysesSpecifications$sccsAnalysisList, "analysisId")))
  if (length(uniqueAnalysisIds) != length(sccsAnalysesSpecifications$sccsAnalysisList)) {
    stop("Duplicate analysis IDs are not allowed")
  }

  if (!file.exists(outputFolder)) {
    dir.create(outputFolder)
  }

  referenceTable <- createReferenceTable(
    sccsAnalysesSpecifications$sccsAnalysisList,
    sccsAnalysesSpecifications$exposuresOutcomeList,
    outputFolder,
    sccsAnalysesSpecifications$combineDataFetchAcrossOutcomes,
    sccsAnalysesSpecifications$analysesToExclude
  )

  loadConceptsPerLoad <- attr(referenceTable, "loadConceptsPerLoad")

  # Create arguments for sccsData objects ----------------------------
  sccsDataObjectsToCreate <- list()
  for (sccsDataFileName in unique(referenceTable$sccsDataFile)) {
    if (!file.exists(file.path(outputFolder, sccsDataFileName))) {
      referenceRow <- referenceTable |>
        filter(.data$sccsDataFile == sccsDataFileName) |>
        head(1)

      loadConcepts <- loadConceptsPerLoad[[referenceRow$loadId]]
      if (length(loadConcepts$exposureIds) == 1 && loadConcepts$exposureIds[1] == "all") {
        loadConcepts$exposureIds <- c()
      }
      useCustomCovariates <- (length(loadConcepts$customCovariateIds) > 0)
      if (length(loadConcepts$customCovariateIds) == 1 && loadConcepts$customCovariateIds[1] == "all") {
        loadConcepts$customCovariateIds <- c()
      }
      args <- list(
        connectionDetails = connectionDetails,
        cdmDatabaseSchema = cdmDatabaseSchema,
        tempEmulationSchema = tempEmulationSchema,
        exposureDatabaseSchema = exposureDatabaseSchema,
        exposureTable = exposureTable,
        outcomeDatabaseSchema = outcomeDatabaseSchema,
        outcomeTable = outcomeTable,
        customCovariateDatabaseSchema = customCovariateDatabaseSchema,
        customCovariateTable = customCovariateTable,
        nestingCohortDatabaseSchema = nestingCohortDatabaseSchema,
        nestingCohortTable = nestingCohortTable,
        outcomeIds = loadConcepts$outcomeIds,
        getDbSccsDataArgs = createGetDbSccsDataArgs(
          exposureIds = loadConcepts$exposureIds,
          customCovariateIds = loadConcepts$customCovariateIds,
          nestingCohortId = loadConcepts$nestingCohortId,
          deleteCovariatesSmallCount = loadConcepts$deleteCovariatesSmallCount,
          studyStartDates = loadConcepts$studyStartDates,
          studyEndDates = loadConcepts$studyEndDates,
          maxCasesPerOutcome = loadConcepts$maxCasesPerOutcome
        )
      )
      sccsDataObjectsToCreate[[length(sccsDataObjectsToCreate) + 1]] <- list(
        args = args,
        sccsDataFileName = file.path(outputFolder, sccsDataFileName)
      )
    }
  }

  # Create arguments for study population objects ---------------------------------------------
  uniqueStudyPopFiles <- unique(referenceTable$studyPopFile)
  uniqueStudyPopFiles <- uniqueStudyPopFiles[!file.exists(file.path(outputFolder, uniqueStudyPopFiles))]
  studyPopFilesToCreate <- list()
  for (studyPopFile in uniqueStudyPopFiles) {
    refRow <- referenceTable[referenceTable$studyPopFile == studyPopFile, ][1, ]
    analysisRow <- Filter(function(x) x$analysisId == refRow$analysisId, sccsAnalysesSpecifications$sccsAnalysisList)[[1]]
    args <- list(createStudyPopulationArgs = analysisRow$createStudyPopulationArgs)
    args$outcomeId <- refRow$outcomeId
    if (is.character(args$createStudyPopulationArgs$restrictTimeToEraId)) {
      args$createStudyPopulationArgs$restrictTimeToEraId <- pull(refRow[, args$createStudyPopulationArgs$restrictTimeToEraId])
    }
    studyPopFilesToCreate[[length(studyPopFilesToCreate) + 1]] <- list(
      args = args,
      sccsDataFile = file.path(outputFolder, refRow$sccsDataFile),
      studyPopFile = file.path(outputFolder, refRow$studyPopFile)
    )
  }

  # Create arguments for interval data objects ---------------------------------------------------
  sccsIntervalDataFiles <- referenceTable$sccsIntervalDataFile
  sccsIntervalDataFiles <- sccsIntervalDataFiles[!file.exists(file.path(outputFolder, sccsIntervalDataFiles))]
  sccsIntervalDataObjectsToCreate <- list()
  for (sccsIntervalDataFile in sccsIntervalDataFiles) {
    refRow <- referenceTable[referenceTable$sccsIntervalDataFile == sccsIntervalDataFile, ][1, ]
    analysisRow <- Filter(function(x) x$analysisId == refRow$analysisId, sccsAnalysesSpecifications$sccsAnalysisList)[[1]]
    sccs <- is(analysisRow$createIntervalDataArgs, "CreateSccsIntervalDataArgs")
    covariateSettings <- analysisRow$createIntervalDataArgs$eraCovariateSettings
    if (is(covariateSettings, "EraCovariateSettings")) {
      covariateSettings <- list(covariateSettings)
    }
    if (!sccs) {
      covariateSettings[[length(covariateSettings) + 1]] <- analysisRow$createIntervalDataArgs$controlIntervalSettings
    }
    instantiatedSettings <- list()
    for (settings in covariateSettings) {
      settings <- settings$clone()
      includeEraIds <- c()
      if (length(settings$includeEraIds) != 0) {
        for (includeEraId in settings$includeEraIds) {
          if (is.character(includeEraId)) {
            if (is.null(refRow[[includeEraId]])) {
              stop(paste0("The 'includeEraIds' argument was set to '", includeEraId, "' when calling createEraCovariateSettings(), but this exposure label is not found in exposures-outcome sets"))
            }
            includeEraIds <- c(includeEraIds, refRow[[includeEraId]])
          } else {
            includeEraIds <- c(includeEraIds, includeEraId)
          }
        }
      }
      excludeEraIds <- c()
      if (length(settings$excludeEraIds) != 0) {
        for (excludeEraId in settings$excludeEraIds) {
          if (is.character(excludeEraId)) {
            if (is.null(refRow[[excludeEraId]])) {
              stop(paste0("The 'excludeEraIds' argument was set to '", excludeEraId, "' when calling createEraCovariateSettings(), but this exposure label is not found in exposures-outcome sets"))
            }
            excludeEraIds <- c(excludeEraIds, refRow[[excludeEraId]])
          } else {
            excludeEraIds <- c(excludeEraIds, excludeEraId)
          }
        }
      }
      settings$includeEraIds <- includeEraIds
      settings$excludeEraIds <- excludeEraIds
      instantiatedSettings[[length(instantiatedSettings) + 1]] <- settings
    }
    if (sccs) {
      args <- list(createSccsIntervalDataArgs = analysisRow$createIntervalDataArgs$clone())
      args$createSccsIntervalDataArgs$eraCovariateSettings <- instantiatedSettings
    } else {
      args <- list(createScriIntervalDataArgs = analysisRow$createIntervalDataArgs$clone())
      args$createScriIntervalDataArgs$controlIntervalSettings <- instantiatedSettings[[length(instantiatedSettings)]]
      args$createScriIntervalDataArgs$eraCovariateSettings <- instantiatedSettings[1:(length(instantiatedSettings) - 1)]
    }
    sccsDataFileName <- refRow$sccsDataFile
    studyPopFile <- refRow$studyPopFile
    sccsIntervalDataObjectsToCreate[[length(sccsIntervalDataObjectsToCreate) + 1]] <- list(
      args = args,
      sccs = sccs,
      sccsDataFileName = file.path(outputFolder, sccsDataFileName),
      studyPopFile = file.path(outputFolder, studyPopFile),
      sccsIntervalDataFileName = file.path(outputFolder, sccsIntervalDataFile)
    )
  }

  # Create arguments for model objects ---------------------------------------------
  sccsModelFiles <- referenceTable$sccsModelFile
  sccsModelFiles <- sccsModelFiles[!file.exists(file.path(outputFolder, sccsModelFiles))]
  sccsModelObjectsToCreate <- list()
  for (sccsModelFile in sccsModelFiles) {
    refRow <- referenceTable[referenceTable$sccsModelFile == sccsModelFile, ][1, ]
    analysisRow <- Filter(function(x) x$analysisId == refRow$analysisId, sccsAnalysesSpecifications$sccsAnalysisList)[[1]]
    args <- list(fitSccsModelArgs = analysisRow$fitSccsModelArgs)
    args$fitSccsModelArgs$control$threads <- sccsMultiThreadingSettings$cvThreads
    sccsModelObjectsToCreate[[length(sccsModelObjectsToCreate) + 1]] <- list(
      args = args,
      sccsIntervalDataFileName = file.path(outputFolder, refRow$sccsIntervalDataFile),
      sccsModelFileName = file.path(outputFolder, sccsModelFile)
    )
  }

  referenceTable$loadId <- NULL
  referenceTable$studyPopId <- NULL
  attr(referenceTable, "loadConcepts") <- NULL
  saveRDS(referenceTable, file.path(outputFolder, "outcomeModelReference.rds"))
  saveRDS(sccsAnalysesSpecifications, file.path(outputFolder, "sccsAnalysesSpecifications.rds"))

  # Construction of objects -------------------------------------------------------------------------
  if (length(sccsDataObjectsToCreate) != 0) {
    message("*** Creating sccsData objects ***")
    cluster <- ParallelLogger::makeCluster(min(length(sccsDataObjectsToCreate), sccsMultiThreadingSettings$getDbSccsDataThreads))
    ParallelLogger::clusterRequire(cluster, "SelfControlledCaseSeries")
    dummy <- ParallelLogger::clusterApply(cluster, sccsDataObjectsToCreate, createSccsDataObject)
    ParallelLogger::stopCluster(cluster)
  }


  if (length(studyPopFilesToCreate) != 0) {
    message("*** Creating studyPopulation objects ***")
    cluster <- ParallelLogger::makeCluster(min(length(studyPopFilesToCreate), sccsMultiThreadingSettings$createStudyPopulationThreads))
    ParallelLogger::clusterRequire(cluster, "SelfControlledCaseSeries")
    dummy <- ParallelLogger::clusterApply(cluster, studyPopFilesToCreate, createStudyPopObject)
    ParallelLogger::stopCluster(cluster)
  }

  if (length(sccsIntervalDataObjectsToCreate) != 0) {
    message("*** Creating sccsIntervalData objects ***")
    cluster <- ParallelLogger::makeCluster(min(length(sccsIntervalDataObjectsToCreate), sccsMultiThreadingSettings$createIntervalDataThreads))
    ParallelLogger::clusterRequire(cluster, "SelfControlledCaseSeries")
    dummy <- ParallelLogger::clusterApply(cluster, sccsIntervalDataObjectsToCreate, createSccsIntervalDataObject)
    ParallelLogger::stopCluster(cluster)
  }

  if (length(sccsModelObjectsToCreate) != 0) {
    message("*** Fitting models ***")
    cluster <- ParallelLogger::makeCluster(min(length(sccsModelObjectsToCreate), sccsMultiThreadingSettings$fitSccsModelThreads))
    ParallelLogger::clusterRequire(cluster, "SelfControlledCaseSeries")
    dummy <- ParallelLogger::clusterApply(cluster, sccsModelObjectsToCreate, createSccsModelObject)
    ParallelLogger::stopCluster(cluster)
  }

  mainFileName <- file.path(outputFolder, "resultsSummary.rds")
  if (!file.exists(mainFileName)) {
    message("*** Summarizing results ***")
    diagnosticsSummaryFileName <- file.path(outputFolder, "diagnosticsSummary.rds")
    summarizeResults(
      referenceTable = referenceTable,
      exposuresOutcomeList = sccsAnalysesSpecifications$exposuresOutcomeList,
      outputFolder = outputFolder,
      mainFileName = mainFileName,
      diagnosticsSummaryFileName = diagnosticsSummaryFileName,
      calibrationThreads = sccsMultiThreadingSettings$calibrationThreads,
      sccsDiagnosticThresholds = sccsAnalysesSpecifications$sccsDiagnosticThresholds,
      controlType = sccsAnalysesSpecifications$controlType
    )
  }

  invisible(referenceTable)
}

createReferenceTable <- function(sccsAnalysisList,
                                 exposuresOutcomeList,
                                 outputFolder,
                                 combineDataFetchAcrossOutcomes,
                                 analysesToExclude) {
  convertAnalysisToTable <- function(analysis) {
    tibble(
      analysisId = analysis$analysisId,
      analysisFolder = sprintf("Analysis_%d", analysis$analysisId)
    )
  }
  analyses <- bind_rows(lapply(sccsAnalysisList, convertAnalysisToTable))
  foldersToCreate <- file.path(outputFolder, analyses$analysisFolder)
  foldersToCreate <- foldersToCreate[!dir.exists(foldersToCreate)]
  sapply(foldersToCreate, dir.create)

  extractExposureIdRefs <- function(exposuresOutcome) {
    return(unlist(ParallelLogger::selectFromList(exposuresOutcome$exposures, "exposureIdRef")))
  }
  uniqueExposureIdRefs <- unique(unlist(sapply(exposuresOutcomeList, extractExposureIdRefs)))

  convertExposuresOutcomeToTable <- function(i) {
    exposuresOutcome <- exposuresOutcomeList[[i]]
    names <- c("exposuresOutcomeSetId", "exposuresOutcomeSetSeqId", "outcomeId", "nestingCohortId", uniqueExposureIdRefs)
    nestingCohortId <- if (is.null(exposuresOutcome$nestingCohortId)) NA else exposuresOutcome$nestingCohortId
    # Use hash of exposure IDs, outcome ID, and nesting ID to generate exposuresOutcomeSetId.
    # This should make ID consistent even when an analysis is distributed over various machines:
    hashString <- paste(exposuresOutcome$outcomeId,
                        nestingCohortId,
                        paste(sapply(exposuresOutcome$exposures, function(x) x$exposureId), collapse = " "))
    hash <- as.integer(as.numeric(paste0("0x", digest::digest(hashString, algo = "murmur32", serialize = FALSE)))-2^31)
    values <- c(hash, i, exposuresOutcome$outcomeId, nestingCohortId, rep(-1, length(uniqueExposureIdRefs)))
    for (exposure in exposuresOutcome$exposures) {
      idx <- which(uniqueExposureIdRefs == exposure$exposureIdRef)
      values[idx + 4] <- exposure$exposureId
    }
    names(values) <- names
    table <- as_tibble(t(values))
    return(table)
  }
  eos <- bind_rows(lapply(seq_along(exposuresOutcomeList), convertExposuresOutcomeToTable))
  if (any(duplicated(eos$exposuresOutcomeSetId))) {
    stop("Collision detected for exposuresOutcomeSetId. Are all exposures-outcome-nesting objects unique?")
  }

  referenceTable <- eos |>
    cross_join(analyses)

  # Determine if loading calls can be combined for efficiency ------------------

  # Instantiate loading settings per row in the reference table
  instantiateArgs <- function(i, sccsAnalysis) {
    exposureIds <- c()
    if (is.null(sccsAnalysis$getDbSccsDataArgs$exposureIds)) {
      exposureIds <- "all"
    } else {
      for (exposureId in sccsAnalysis$getDbSccsDataArgs$exposureIds) {
        if (is.character(exposureId)) {
          if (!exposureId %in% uniqueExposureIdRefs) {
            stop(paste0("The 'exposureIds' argument was set to '", exposureId, "' when calling createGetDbSccsDataArgs(), but this exposure label is not found in exposures-outcome sets"))
          }
          exposureIds <- c(exposureIds, referenceTable$exposureId[i])
        } else {
          exposureIds <- c(exposureIds, as.numeric(exposureId))
        }
      }
    }
    customCovariateIds <- sccsAnalysis$getDbSccsDataArgs$customCovariateIds
    for (customCovariateId in sccsAnalysis$getDbSccsDataArgs$customCovariateIds) {
      if (is.character(customCovariateId)) {
        if (!customCovariateId %in% uniqueExposureIdRefs) {
          stop(paste0("The 'customCovariateIds' argument was set to '", customCovariateId, "' when calling createGetDbSccsDataArgs(), but this exposure label is not found in exposures-outcome sets"))
        }
        customCovariateIds <- c(customCovariateIds, referenceTable[i, customCovariateId])
      } else {
        customCovariateIds <- c(customCovariateIds, customCovariateId)
      }
    }
    nestingCohortId <- referenceTable$nestingCohortId[i]
    if (is.na(nestingCohortId)) {
      nestingCohortId <- sccsAnalysis$getDbSccsDataArgs$nestingCohortId
    }
    instantiatedArgs <- sccsAnalysis$getDbSccsDataArgs$toList()
    instantiatedArgs$outcomeId <- referenceTable$outcomeId[i]
    instantiatedArgs$exposureIds <- exposureIds
    instantiatedArgs$customCovariateIds <- customCovariateIds
    instantiatedArgs$nestingCohortId <- nestingCohortId
    instantiatedArgs$rowId <- i
    return(instantiatedArgs)
  }
  instantiatedArgsPerRow <- vector(mode = "list", length = nrow(referenceTable))
  for (sccsAnalysis in sccsAnalysisList) {
    idx <- which(referenceTable$analysisId == sccsAnalysis$analysisId)
    instantiatedArgsPerRow[idx] <- lapply(idx,
                                          instantiateArgs,
                                          sccsAnalysis = sccsAnalysis)
  }

  # Group loads where possible
  if (combineDataFetchAcrossOutcomes) {
    loads <- ParallelLogger::selectFromList(
      instantiatedArgsPerRow,
      c(
        "nestingCohortId",
        "deleteCovariatesSmallCount",
        "studyStartDates",
        "studyEndDates",
        "maxCasesPerOutcome"
      )
    )
  } else {
    loads <- ParallelLogger::selectFromList(
      instantiatedArgsPerRow,
      c(
        "nestingCohortId",
        "deleteCovariatesSmallCount",
        "studyStartDates",
        "studyEndDates",
        "maxCasesPerOutcome",
        "outcomeId"
      )
    )
  }
  # Compute unions of concept sets
  loadStrings <- sapply(loads, jsonlite::toJSON)
  uniqueLoadStrings <- unique(loadStrings)
  referenceTable$sccsDataFile <- ""
  referenceTable$loadId <- NA
  loadConceptsPerLoad <- list()
  for (loadId in seq_along(uniqueLoadStrings)) {
    uniqueLoadString <- uniqueLoadStrings[[loadId]]
    rowIds <- which(loadStrings == uniqueLoadString)
    groupables <-instantiatedArgsPerRow[rowIds]
    outcomeIds <- unique(unlist(ParallelLogger::selectFromList(groupables, "outcomeId")))
    exposureIds <- lapply(groupables, function(x) x$exposureIds)
    if (any(exposureIds == "all")) {
      exposureIds <- "all"
    } else {
      exposureIds <- unique(do.call(c, exposureIds))
    }
    customCovariateIds <- unique(do.call(c, lapply(groupables, function(x) x$customCovariateIds)))
    loadConceptsPerLoad[[loadId]] <- list(
      exposureIds = unique(exposureIds),
      outcomeIds = unique(outcomeIds),
      customCovariateIds = unique(customCovariateIds),
      nestingCohortId = groupables[[1]]$nestingCohortId,
      deleteCovariatesSmallCount = groupables[[1]]$deleteCovariatesSmallCount,
      studyStartDates = groupables[[1]]$studyStartDates,
      studyEndDates = groupables[[1]]$studyEndDates,
      maxCasesPerOutcome = groupables[[1]]$maxCasesPerOutcome
    )
    sccsDataFileName <- .createSccsDataFileName(loadId)
    referenceTable$loadId[rowIds] <- loadId
    referenceTable$sccsDataFile[rowIds] <- sccsDataFileName
  }
  attr(referenceTable, "loadConceptsPerLoad") <- loadConceptsPerLoad

  # Add study population filenames --------------------------
  analysisIds <- unlist(ParallelLogger::selectFromList(sccsAnalysisList, "analysisId"))
  uniqueStudyPopArgs <- unique(ParallelLogger::selectFromList(sccsAnalysisList, "createStudyPopulationArgs"))
  uniqueStudyPopArgs <- lapply(uniqueStudyPopArgs, function(x) {
    return(x[[1]])
  })

  studyPopId <- sapply(
    sccsAnalysisList,
    function(sccsAnalysis, uniqueStudyPopArgs) {
      return(which.list(
        uniqueStudyPopArgs,
        sccsAnalysis$createStudyPopulationArgs
      ))
    },
    uniqueStudyPopArgs
  )
  restrictTimeToEraId <- sapply(
    sccsAnalysisList,
    function(sccsAnalysis) {
      if (("restrictTimeToEraId" %in% names(sccsAnalysis$createStudyPopulationArgs)) &&
          (is.character(sccsAnalysis$createStudyPopulationArgs$restrictTimeToEraId)))
        return(sccsAnalysis$createStudyPopulationArgs$restrictTimeToEraId)
      else  {
        return("")
      }
    }
  )
  analysisIdToStudyPopId <- tibble(analysisId = analysisIds, studyPopId = studyPopId, restrictTimeToEraId = restrictTimeToEraId)
  referenceTable <- inner_join(referenceTable, analysisIdToStudyPopId, by = join_by("analysisId"))
  referenceTable$restrictTimeToEraId <- sapply(seq_along(referenceTable$restrictTimeToEraId),
                                               function(i) {
                                                 id <- referenceTable$restrictTimeToEraId[i]
                                                 if (id =="") {
                                                   return(NA)
                                                 } else {
                                                   return(pull(referenceTable[i, id]))
                                                 }
                                               })
  referenceTable$studyPopFile <- .createStudyPopulationFileName(
    loadId = referenceTable$loadId,
    studyPopId = referenceTable$studyPopId,
    outcomeId = referenceTable$outcomeId,
    exposureId = referenceTable$restrictTimeToEraId
  )

  # Add interval data and model filenames -----------------------------------------------------
  generateFileName <- function(i) {
    return(.createSccsIntervalDataFileName(
      paste("Analysis_", referenceTable$analysisId[i], sep = ""),
      referenceTable$exposureId[i],
      referenceTable$outcomeId[i],
      referenceTable$nestingCohortId[i]
    ))
  }
  referenceTable$sccsIntervalDataFile <- generateFileName(1:nrow(referenceTable))

  generateFileName <- function(i) {
    return(.createSccsModelFileName(
      paste("Analysis_", referenceTable$analysisId[i], sep = ""),
      referenceTable$exposureId[i],
      referenceTable$outcomeId[i],
      referenceTable$nestingCohortId[i]
    ))
  }
  referenceTable$sccsModelFile <- generateFileName(1:nrow(referenceTable))

  # Remove rows that the user specified to exclude ---------------------------------
  if (!is.null(analysesToExclude) && nrow(analysesToExclude) != 0) {
    matchingColumns <- colnames(analysesToExclude)[colnames(analysesToExclude) %in% c("exposureId", "outcomeId", "analysisId", "nestingCohortId")]
    if (length(matchingColumns) == 0) {
      stop("The 'analysesToExclude' argument should contain columns 'exposureId', 'outcomeId', or 'analysisId'.")
    }
    analysesToExclude <- analysesToExclude[, matchingColumns]
    countBefore <- nrow(referenceTable)
    referenceTable <- referenceTable |>
      anti_join(analysesToExclude, by = matchingColumns)
    countAfter <- nrow(referenceTable)
    message(sprintf(
      "Removed %d of the %d exposure-outcome-analysis combinations as specified by the user.",
      countBefore - countAfter,
      countBefore
    ))
  }

  return(referenceTable)
}

which.list <- function(list, object) {
  return(do.call("c", lapply(1:length(list), function(i, list, object) {
    if (identical(list[[i]], object)) {
      return(i)
    } else {
      return(c())
    }
  }, list, object)))
}

getSccsData <- function(sccsDataFile) {
  if (mget("cachedSccsDataFile", envir = cache, ifnotfound = "") == sccsDataFile) {
    sccsData <- get("cachedSccsData", envir = cache)
    if (!Andromeda::isValidAndromeda(sccsData)) {
      sccsData <- loadSccsData(sccsDataFile)
      assign("cachedSccsData", sccsData, envir = cache)
    }
  } else {
    sccsData <- loadSccsData(sccsDataFile)
    assign("cachedSccsData", sccsData, envir = cache)
    assign("cachedSccsDataFile", sccsDataFile, envir = cache)
  }
  return(sccsData)
}

createSccsDataObject <- function(params) {
  sccsData <- do.call("getDbSccsData", params$args)
  saveSccsData(sccsData, params$sccsDataFileName)
  return(NULL)
}

createStudyPopObject <- function(params) {
  sccsData <- getSccsData(params$sccsDataFile)
  params$args$sccsData <- sccsData
  studyPopulation <- do.call("createStudyPopulation", params$args)
  saveRDS(studyPopulation, params$studyPopFile)
  return(NULL)
}

createSccsIntervalDataObject <- function(params) {
  sccsData <- getSccsData(params$sccsDataFileName)
  params$args$sccsData <- sccsData
  studyPopulation <- readRDS(params$studyPopFile)
  params$args$studyPopulation <- studyPopulation
  if (params$sccs) {
    sccsIntervalData <- do.call("createSccsIntervalData", params$args)
  } else {
    sccsIntervalData <- do.call("createScriIntervalData", params$args)
  }
  saveSccsIntervalData(sccsIntervalData, params$sccsIntervalDataFileName)
  return(NULL)
}

createSccsModelObject <- function(params) {
  sccsIntervalData <- loadSccsIntervalData(params$sccsIntervalDataFileName)
  params$args$sccsIntervalData <- sccsIntervalData
  sccsModel <- do.call("fitSccsModel", params$args)
  saveRDS(sccsModel, params$sccsModelFileName)
  return(NULL)
}

.createSccsDataFileName <- function(loadId) {
  name <- sprintf("SccsData_l%s.zip", loadId)
  return(name)
}

.f <- function(x) {
  return(format(x, scientific = FALSE, trim = TRUE))
}

.createStudyPopulationFileName <- function(loadId,
                                           studyPopId,
                                           outcomeId,
                                           exposureId) {
  name <- ifelse(is.na(exposureId),
                 sprintf("StudyPop_l%s_s%s_o%s.rds", loadId, studyPopId, .f(outcomeId)),
                 sprintf("StudyPop_l%s_s%s_o%s_e%s.rds", loadId, studyPopId, .f(outcomeId), .f(exposureId)))
  return(name)
}

.createSccsIntervalDataFileName <- function(analysisFolder, exposureId, outcomeId, nestingCohortId) {
  name <- if_else(is.na(nestingCohortId),
                  name <- sprintf("SccsIntervalData_e%s_o%s.zip", .f(exposureId), .f(outcomeId)),
                  name <- sprintf("SccsIntervalData_e%s_o%s_i%s.zip", .f(exposureId), .f(outcomeId), .f(nestingCohortId)))
  return(file.path(analysisFolder, name))
}

.createSccsModelFileName <- function(analysisFolder, exposureId, outcomeId, nestingCohortId) {
  name <- if_else(is.na(nestingCohortId),
                  name <- sprintf("SccsModel_e%s_o%s.rds", .f(exposureId), .f(outcomeId)),
                  name <- sprintf("SccsModel_e%s_o%s_i%s.rds", .f(exposureId), .f(outcomeId), .f(nestingCohortId)))
  return(file.path(analysisFolder, name))
}

.selectByType <- function(type, value, label) {
  if (is.null(type)) {
    if (is.list(value)) {
      stop(paste("Multiple ",
                 label,
                 "s specified, but none selected in analyses (comparatorType).",
                 sep = ""
      ))
    }
    return(value)
  } else {
    if (!is.list(value) || is.null(value[type])) {
      stop(paste(label, "type not found:", type))
    }
    return(value[type])
  }
}

.passBooleanToString <- function(pass) {
  case_when(
    is.na(pass) ~ "NOT EVALUATED",
    pass ~ "PASS",
    TRUE ~ "FAIL"
  )
}

# referenceTable = getFileReference(outputFolder)
summarizeResults <- function(referenceTable,
                             exposuresOutcomeList,
                             outputFolder,
                             mainFileName,
                             diagnosticsSummaryFileName,
                             calibrationThreads,
                             sccsDiagnosticThresholds,
                             controlType) {
  rows <- list()
  # i = 3
  pb <- txtProgressBar(style = 3)
  for (i in seq_len(nrow(referenceTable))) {
    refRow <- referenceTable[i, ]
    sccsModel <- readRDS(file.path(outputFolder, as.character(refRow$sccsModelFile)))
    attrition <- as.data.frame(sccsModel$metaData$attrition)
    attrition <- attrition[nrow(attrition), ]
    studyPop <- readRDS(file.path(outputFolder, as.character(refRow$studyPopFile)))
    timeStabilityDiagnostic <- checkTimeStabilityAssumption(
      studyPopulation = studyPop,
      sccsModel = sccsModel,
      maxRatio = sccsDiagnosticThresholds$timeTrendMaxRatio
    )
    eventExposureIndependenceDiagnostic <- checkEventExposureIndependenceAssumption(
      sccsModel = sccsModel,
      nullBounds = sccsDiagnosticThresholds$eventExposureDependenceNullBounds
    )
    # There could be multiple pre-exposure windows. Pick one, and make sure to pick a failing one
    # if exists:
    eventExposureIndependenceDiagnostic <- eventExposureIndependenceDiagnostic |>
      arrange(.data$pass) |>
      head(1)

    eventObservationIndependenceDiagnostic <- checkEventObservationIndependenceAssumption(
      sccsModel = sccsModel,
      nullBounds = sccsDiagnosticThresholds$eventObservationDependenceNullBounds
    )
    rareOutcomeDiagnostic <- checkRareOutcomeAssumption(
      studyPopulation = studyPop,
      maxPrevalence = sccsDiagnosticThresholds$rareOutcomeMaxPrevalence
    )
    # covariateSettings = sccsModel$metaData$covariateSettingsList[[1]]
    for (covariateSettings in sccsModel$metaData$covariateSettingsList) {
      if (covariateSettings$exposureOfInterest) {
        # j = 1
        for (j in seq_along(covariateSettings$outputIds)) {
          mdrr <- computeMdrr(object = sccsModel, exposureCovariateId = covariateSettings$outputIds[j])
          if (is.null(sccsModel$metaData$covariateStatistics)) {
            covariateStatistics <- tibble()
          } else {
            covariateStatistics <- sccsModel$metaData$covariateStatistics |>
              filter(.data$covariateId == covariateSettings$outputIds[j])
          }
          if (is.null(sccsModel$estimates)) {
            estimate <- tibble()
          } else {
            estimate <- sccsModel$estimates |>
              filter(.data$covariateId == covariateSettings$outputIds[j])
          }
          if (nrow(estimate) == 0) {
            p <- NA
            oneSidedP <- NA
          } else {
            p <- EmpiricalCalibration::computeTraditionalP(logRr = estimate$logRr,
                                                           seLogRr = estimate$seLogRr)
            oneSidedP <- EmpiricalCalibration::computeTraditionalP(logRr = estimate$logRr,
                                                                   seLogRr = estimate$seLogRr,
                                                                   twoSided = FALSE,
                                                                   upper = TRUE)
          }
          if (covariateSettings$eraIds[j] == -1) {
            exposure <- list(trueEffectSize = NA)
          } else {
            exposuresOutcome <- exposuresOutcomeList[[refRow$exposuresOutcomeSetSeqId]]
            exposure <- Filter(function(x) x$exposureId == covariateSettings$eraIds[j], exposuresOutcome$exposures)
            if (length(exposure) != 1) {
              stop(sprintf("Error finding exposure for covariate ID %d in analysis ID %d", covariateSettings$outputIds[j], refRow$analysisId))
            } else {
              exposure <- exposure[[1]]
            }
          }
          row <- tibble(
            exposuresOutcomeSetId = refRow$exposuresOutcomeSetId,
            nestingCohortId = refRow$nestingCohortId,
            outcomeId = refRow$outcomeId,
            analysisId = refRow$analysisId,
            covariateAnalysisId = covariateSettings$covariateAnalysisId,
            covariateId = covariateSettings$outputIds[j],
            covariateName = covariateSettings$label,
            eraId = covariateSettings$eraIds[j],
            trueEffectSize = exposure$trueEffectSize,
            outcomeSubjects = attrition$outcomeSubjects,
            outcomeEvents = attrition$outcomeEvents,
            outcomeObservationPeriods = attrition$outcomeObsPeriods,
            observedDays = attrition$observedDays,
            covariateSubjects = ifelse(nrow(covariateStatistics) == 0, 0, covariateStatistics$personCount),
            covariateDays = ifelse(nrow(covariateStatistics) == 0, 0, covariateStatistics$dayCount),
            covariateEras = ifelse(nrow(covariateStatistics) == 0, 0, covariateStatistics$eraCount),
            covariateOutcomes = ifelse(nrow(covariateStatistics) == 0, 0, covariateStatistics$outcomeCount),
            rr = ifelse(nrow(estimate) == 0, NA, exp(estimate$logRr)),
            ci95Lb = ifelse(nrow(estimate) == 0, NA, exp(estimate$logLb95)),
            ci95Ub = ifelse(nrow(estimate) == 0, NA, exp(estimate$logUb95)),
            p = p,
            oneSidedP = oneSidedP,
            logRr = ifelse(nrow(estimate) == 0, NA, estimate$logRr),
            seLogRr = ifelse(nrow(estimate) == 0, NA, estimate$seLogRr),
            llr = ifelse(nrow(estimate) == 0, NA, estimate$llr),
            timeStabilityP = timeStabilityDiagnostic$p,
            timeStabilityDiagnostic = .passBooleanToString(timeStabilityDiagnostic$pass),
            eventExposureLb = eventExposureIndependenceDiagnostic$lb,
            eventExposureUb = eventExposureIndependenceDiagnostic$ub,
            eventExposureDiagnostic = .passBooleanToString(eventExposureIndependenceDiagnostic$pass),
            eventObservationLb = eventObservationIndependenceDiagnostic$lb,
            eventObservationUb = eventObservationIndependenceDiagnostic$lb,
            eventObservationDiagnostic = .passBooleanToString(eventObservationIndependenceDiagnostic$pass),
            rareOutcomePrevalence = rareOutcomeDiagnostic$outcomeProportion,
            rareOutcomeDiagnostic = .passBooleanToString(rareOutcomeDiagnostic$pass),
            mdrr = mdrr$mdrr,
            mdrrDiagnostic = .passBooleanToString(mdrr < sccsDiagnosticThresholds$mdrrThreshold)
          )
          rows[[length(rows) + 1]] <- row
        }
      }
    }
    setTxtProgressBar(pb, i / nrow(referenceTable))
  }
  close(pb)
  allResults <- bind_rows(rows)
  allResults <- calibrateEstimates(
    results = allResults,
    calibrationThreads = calibrationThreads,
    controlType = controlType
  )
  resultsSummary <- allResults |>
    select("exposuresOutcomeSetId",
           "nestingCohortId",
           "outcomeId",
           "analysisId",
           "covariateAnalysisId",
           "covariateId",
           "covariateName",
           "eraId",
           "trueEffectSize",
           "outcomeSubjects",
           "outcomeEvents",
           "outcomeObservationPeriods",
           "observedDays",
           "covariateSubjects",
           "covariateDays",
           "covariateEras",
           "covariateOutcomes",
           "rr",
           "ci95Lb",
           "ci95Ub",
           "p",
           "oneSidedP",
           "logRr",
           "seLogRr",
           "llr",
           "calibratedRr",
           "calibratedCi95Lb",
           "calibratedCi95Ub",
           "calibratedP",
           "calibratedOneSidedP",
           "calibratedLogRr",
           "calibratedSeLogRr")
  saveRDS(resultsSummary, mainFileName)

  diagnosticsSummary <- allResults |>
    mutate(easeDiagnostic = .passBooleanToString(.data$ease < sccsDiagnosticThresholds$easeThreshold)) |>
    mutate(unblindForEvidenceSynthesis = .data$unblindForCalibration & .data$easeDiagnostic != "FAIL") |>
    mutate(unblind = .data$unblindForEvidenceSynthesis & .data$mdrrDiagnostic != "FAIL") |>
    select("exposuresOutcomeSetId",
           "nestingCohortId",
           "outcomeId",
           "analysisId",
           "covariateAnalysisId",
           "covariateId",
           "covariateName",
           "eraId",
           "timeStabilityP",
           "timeStabilityDiagnostic",
           "eventExposureLb",
           "eventExposureUb",
           "eventExposureDiagnostic",
           "eventObservationLb",
           "eventObservationUb",
           "eventObservationDiagnostic",
           "rareOutcomePrevalence",
           "rareOutcomeDiagnostic",
           "mdrr",
           "mdrrDiagnostic",
           "ease",
           "easeDiagnostic",
           "unblind",
           "unblindForEvidenceSynthesis")
  saveRDS(diagnosticsSummary, diagnosticsSummaryFileName)
}

calibrateEstimates <- function(results, calibrationThreads, diagnosticsSummary = diagnosticsSummary, controlType) {
  if (nrow(results) == 0) {
    return(results)
  }
  message("Calibrating estimates")

  results <- results |>
    mutate(unblindForCalibration = .data$timeStabilityDiagnostic != "FAIL" &
             .data$eventExposureDiagnostic != "FAIL" &
             .data$eventObservationDiagnostic != "FAIL" &
             .data$rareOutcomeDiagnostic != "FAIL")
  if (controlType == "outcome") {
    groups <- split(results, paste(results$eraId, results$nestingCohortId, results$covariateId, results$analysisId))
  } else {
    groups <- split(results, paste(results$outcomeId, results$covariateId, results$analysisId))
  }
  cluster <- ParallelLogger::makeCluster(min(length(groups), calibrationThreads))
  results <- ParallelLogger::clusterApply(cluster, groups, calibrateGroup)
  ParallelLogger::stopCluster(cluster)
  results <- bind_rows(results)
  return(results)
}

# group = groups[[1]]
calibrateGroup <- function(group) {
  ncs <- group[!is.na(group$trueEffectSize) & group$trueEffectSize == 1 & !is.na(group$seLogRr) & group$unblindForCalibration, ]
  pcs <- group[!is.na(group$trueEffectSize) & group$trueEffectSize != 1 & !is.na(group$seLogRr) & group$unblindForCalibration, ]
  if (nrow(ncs) >= 5) {
    null <- EmpiricalCalibration::fitMcmcNull(logRr = ncs$logRr, seLogRr = ncs$seLogRr)
    ease <- EmpiricalCalibration::computeExpectedAbsoluteSystematicError(null)
    calibratedP <- EmpiricalCalibration::calibrateP(null = null,
                                                    logRr = group$logRr,
                                                    seLogRr = group$seLogRr)
    calibratedOneSidedP <- EmpiricalCalibration::calibrateP(null = null,
                                                            logRr = group$logRr,
                                                            seLogRr = group$seLogRr,
                                                            twoSided = FALSE,
                                                            upper = TRUE)
    if (nrow(pcs) >= 5) {
      model <- EmpiricalCalibration::fitSystematicErrorModel(
        logRr = c(ncs$logRr, pcs$logRr),
        seLogRr = c(ncs$seLogRr, pcs$seLogRr),
        trueLogRr = log(c(ncs$trueEffectSize, pcs$trueEffectSize)),
        estimateCovarianceMatrix = FALSE
      )
    } else {
      model <- EmpiricalCalibration::convertNullToErrorModel(null)
    }
    calibratedCi <- EmpiricalCalibration::calibrateConfidenceInterval(model = model, logRr = group$logRr, seLogRr = group$seLogRr)
    group$calibratedRr <- exp(calibratedCi$logRr)
    group$calibratedCi95Lb <- exp(calibratedCi$logLb95Rr)
    group$calibratedCi95Ub <- exp(calibratedCi$logUb95Rr)
    group$calibratedP <- calibratedP$p
    group$calibratedOneSidedP <- calibratedOneSidedP$p
    group$calibratedLogRr <- calibratedCi$logRr
    group$calibratedSeLogRr <- calibratedCi$seLogRr
    group$ease <- ease$ease
  } else {
    group$calibratedRr <- NA
    group$calibratedCi95Lb <- NA
    group$calibratedCi95Ub <- NA
    group$calibratedP <- NA
    group$calibratedOneSidedP <- NA
    group$calibratedLogRr <- NA
    group$calibratedSeLogRr <- NA
    group$ease <- NA
  }
  return(group)
}


#' Get file reference
#'
#' @param outputFolder       Name of the folder where all the outputs have been written to.
#'
#' @return
#' A tibble containing the names of the files for various artifacts created for each outcome-exposures set.
#'
#' @export
getFileReference <- function(outputFolder) {
  errorMessages <- checkmate::makeAssertCollection()
  checkmate::assertCharacter(outputFolder, len = 1, add = errorMessages)
  checkmate::reportAssertions(collection = errorMessages)
  outputFolder <- normalizePath(outputFolder)
  omr <- readRDS(file.path(outputFolder, "outcomeModelReference.rds"))
  return(omr)
}

#' Get a summary report of the analyses results
#'
#' @param outputFolder       Name of the folder where all the outputs have been written to.
#'
#' @return
#' A tibble containing summary statistics for each outcome-covariate-analysis combination.
#'
#' @export
getResultsSummary <- function(outputFolder) {
  errorMessages <- checkmate::makeAssertCollection()
  checkmate::assertCharacter(outputFolder, len = 1, add = errorMessages)
  checkmate::reportAssertions(collection = errorMessages)
  outputFolder <- normalizePath(outputFolder)
  results <- readRDS(file.path(outputFolder, "resultsSummary.rds"))
  return(results)
}

#' Get a summary report of the analyses diagnostics
#'
#' @param outputFolder       Name of the folder where all the outputs have been written to.
#'
#' @return
#' A tibble containing summary diagnostics for each outcome-covariate-analysis combination.
#'
#' @export
getDiagnosticsSummary <- function(outputFolder) {
  errorMessages <- checkmate::makeAssertCollection()
  checkmate::assertCharacter(outputFolder, len = 1, add = errorMessages)
  checkmate::reportAssertions(collection = errorMessages)
  outputFolder <- normalizePath(outputFolder)
  results <- readRDS(file.path(outputFolder, "diagnosticsSummary.rds"))
  return(results)
}

Try the SelfControlledCaseSeries package in your browser

Any scripts or data that you put into this service are public.

SelfControlledCaseSeries documentation built on Aug. 8, 2025, 7:34 p.m.