R/Table1.R

Defines functions createTable1CovariateSettings createTable1 getDefaultTable1Specifications

Documented in createTable1 createTable1CovariateSettings getDefaultTable1Specifications

# Copyright 2024 Observational Health Data Sciences and Informatics
#
# This file is part of FeatureExtraction
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Get the default table 1 specifications
#'
#' @description
#' Loads the default specifications for a table 1, to be used with the \code{\link{createTable1}}
#' function.
#'
#' @return
#' A specifications objects.
#'
#' @examples
#' \dontrun{
#' defaultTable1Specs <- getDefaultTable1Specifications()
#' }
#'
#' @export
getDefaultTable1Specifications <- function() {
  fileName <- system.file("csv", "Table1Specs.csv", package = "FeatureExtraction")
  # Workaround for issue https://github.com/tidyverse/vroom/issues/519:
  readr::local_edition(1)
  colTypes <- list(label = readr::col_character(), analysisId = readr::col_integer(), covariateIds = readr::col_character())
  specifications <- readr::read_csv(fileName, col_types = colTypes)
  return(specifications)
}

#' Create a table 1
#'
#' @description
#' Creates a formatted table of cohort characteristics, to be included in publications or reports.
#' Allows for creating a table describing a single cohort, or a table comparing two cohorts.
#'
#' @param covariateData1   The covariate data of the cohort to be included in the table.
#' @param covariateData2   The covariate data of the cohort to also be included, when comparing two
#'                         cohorts.
#' @param cohortId1        If provided, \code{covariateData1} will be restricted to this cohort. If not
#'                         provided, \code{covariateData1} is assumed to contain data on only 1 cohort.
#' @param cohortId2        If provided, \code{covariateData2} will be restricted to this cohort. If not
#'                         provided, \code{covariateData2} is assumed to contain data on only 1 cohort.
#' @param specifications   Specifications of which covariates to display, and how.
#' @param output           The output format for the table. Options are \code{output = "two columns"},
#'                         \code{output = "one column"}, or \code{output = "list"}.
#' @param showCounts       Show the number of cohort entries having the binary covariate?
#' @param showPercent      Show the percentage of cohort entries having the binary covariate?
#' @param percentDigits    Number of digits to be used for percentages.
#' @param stdDiffDigits    Number of digits to be used for the standardized differences.
#' @param valueDigits      Number of digits to be used for the values of continuous variables.
#'
#' @return
#' A data frame, or, when \code{output = "list"} a list of two data frames.
#'
#' @examples
#' \dontrun{
#' eunomiaConnectionDetails <- Eunomia::getEunomiaConnectionDetails()
#' covSettings <- createDefaultCovariateSettings()
#' Eunomia::createCohorts(
#'   connectionDetails = eunomiaConnectionDetails,
#'   cdmDatabaseSchema = "main",
#'   cohortDatabaseSchema = "main",
#'   cohortTable = "cohort"
#' )
#' covData1 <- getDbCovariateData(
#'   connectionDetails = eunomiaConnectionDetails,
#'   oracleTempSchema = NULL,
#'   cdmDatabaseSchema = "main",
#'   cdmVersion = "5",
#'   cohortTable = "cohort",
#'   cohortDatabaseSchema = "main",
#'   cohortTableIsTemp = FALSE,
#'   cohortId = 1,
#'   rowIdField = "subject_id",
#'   covariateSettings = covSettings,
#'   aggregated = TRUE
#' )
#' covData2 <- getDbCovariateData(
#'   connectionDetails = eunomiaConnectionDetails,
#'   oracleTempSchema = NULL,
#'   cdmDatabaseSchema = "main",
#'   cdmVersion = "5",
#'   cohortTable = "cohort",
#'   cohortDatabaseSchema = "main",
#'   cohortTableIsTemp = FALSE,
#'   cohortId = 2,
#'   rowIdField = "subject_id",
#'   covariateSettings = covSettings,
#'   aggregated = TRUE
#' )
#' table1 <- createTable1(
#'   covariateData1 = covData1,
#'   covariateData2 = covData2,
#'   cohortId1 = 1,
#'   cohortId2 = 2,
#'   specifications = getDefaultTable1Specifications(),
#'   output = "one column",
#'   showCounts = FALSE,
#'   showPercent = TRUE,
#'   percentDigits = 1,
#'   valueDigits = 1,
#'   stdDiffDigits = 2
#' )
#' }
#'
#' @export
createTable1 <- function(covariateData1,
                         covariateData2 = NULL,
                         cohortId1 = NULL,
                         cohortId2 = NULL,
                         specifications = getDefaultTable1Specifications(),
                         output = "two columns",
                         showCounts = FALSE,
                         showPercent = TRUE,
                         percentDigits = 1,
                         valueDigits = 1,
                         stdDiffDigits = 2) {
  comparison <- !is.null(covariateData2)
  if (!isCovariateData(covariateData1)) {
    stop("covariateData1 is not of type 'covariateData'")
  }
  if (comparison && !isCovariateData(covariateData2)) {
    stop("covariateData2 is not of type 'covariateData'")
  }
  if (!isAggregatedCovariateData(covariateData1)) {
    stop("Covariate1 data is not aggregated")
  }
  if (comparison && !isAggregatedCovariateData(covariateData2)) {
    stop("Covariate2 data is not aggregated")
  }
  if (!showCounts && !showPercent) {
    stop("Must show counts or percent, or both")
  }
  if (!(output %in% c("one column", "two columns", "list"))) {
    stop("The `output` argument  must be 'one column', 'two columns', or 'list'")
  }

  fixCase <- function(label) {
    idx <- (toupper(label) == label)
    if (any(idx)) {
      label[idx] <- paste0(
        substr(label[idx], 1, 1),
        tolower(substr(label[idx], 2, nchar(label[idx])))
      )
    }
    return(label)
  }

  formatCount <- function(x) {
    result <- format(round(x), justify = "right", big.mark = ",")
    result <- gsub("NA", "", result)
    result <- gsub(" ", " ", result)
    return(result)
  }

  formatPercent <- function(x) {
    result <- format(round(100 * x, percentDigits), digits = percentDigits + 1, justify = "right")
    result <- gsub("NA", "", result)
    result <- gsub(" ", " ", result)
    return(result)
  }

  formatStdDiff <- function(x) {
    result <- format(round(x, stdDiffDigits), digits = stdDiffDigits + 1, justify = "right")
    result <- gsub("NA", "", result)
    result <- gsub(" ", " ", result)
    return(result)
  }

  formatValue <- function(x) {
    return(format(round(x, valueDigits), nsmall = valueDigits))
  }

  if (is.null(covariateData1$covariates)) {
    covariates <- NULL
  } else {
    covariates <- covariateData1$covariates
    if (!is.null(cohortId1)) {
      covariates <- covariates %>%
        filter(.data$cohortDefinitionId == cohortId1)
    }
    covariates <- covariates %>%
      select(
        covariateId = "covariateId",
        count1 = "sumValue",
        percent1 = "averageValue"
      ) %>%
      collect()
    covariates$count1 <- formatCount(covariates$count1)
    covariates$percent1 <- formatPercent(covariates$percent1)
  }
  if (is.null(covariateData1$covariatesContinuous)) {
    covariatesContinuous <- NULL
  } else {
    covariatesContinuous <- covariateData1$covariatesContinuous
    if (!is.null(cohortId1)) {
      covariatesContinuous <- covariatesContinuous %>%
        filter(.data$cohortDefinitionId == cohortId1)
    }
    covariatesContinuous <- covariatesContinuous %>%
      select(
        covariateId = "covariateId",
        averageValue1 = "averageValue",
        standardDeviation1 = "standardDeviation",
        minValue1 = "minValue",
        p25Value1 = "p25Value",
        medianValue1 = "medianValue",
        p75Value1 = "p75Value",
        maxValue1 = "maxValue"
      ) %>%
      collect()
    covariatesContinuous$averageValue1 <- formatValue(covariatesContinuous$averageValue1)
    covariatesContinuous$standardDeviation1 <- formatValue(covariatesContinuous$standardDeviation1)
    covariatesContinuous$minValue1 <- formatValue(covariatesContinuous$minValue1)
    covariatesContinuous$p25Value1 <- formatValue(covariatesContinuous$p25Value1)
    covariatesContinuous$medianValue1 <- formatValue(covariatesContinuous$medianValue1)
    covariatesContinuous$p75Value1 <- formatValue(covariatesContinuous$p75Value1)
    covariatesContinuous$maxValue1 <- formatValue(covariatesContinuous$maxValue1)
  }

  covariateRef <- covariateData1$covariateRef %>%
    collect()
  analysisRef <- covariateData1$analysisRef %>%
    collect()
  if (comparison) {
    stdDiff <- computeStandardizedDifference(
      covariateData1 = covariateData1,
      covariateData2 = covariateData2,
      cohortId1 = cohortId1,
      cohortId2 = cohortId2
    )
    if (!is.null(covariateData1$covariates) && !is.null(covariateData2$covariates)) {
      tempCovariates <- covariateData2$covariates
      if (!is.null(cohortId2)) {
        tempCovariates <- tempCovariates %>%
          filter(.data$cohortDefinitionId == cohortId2)
      }
      tempCovariates <- tempCovariates %>%
        select(
          covariateId = "covariateId",
          count2 = "sumValue",
          percent2 = "averageValue"
        ) %>%
        collect()
      tempCovariates$count2 <- formatCount(tempCovariates$count2)
      tempCovariates$percent2 <- formatPercent(tempCovariates$percent2)
      covariates <- merge(covariates, tempCovariates, all = TRUE)
      covariates$count1[is.na(covariates$count1)] <- " 0"
      covariates$count2[is.na(covariates$count2)] <- " 0"
      covariates$percent1[is.na(covariates$percent1)] <- " 0"
      covariates$percent2[is.na(covariates$percent2)] <- " 0"
      covariates <- merge(covariates, stdDiff[, c("covariateId", "stdDiff")])
      covariates$stdDiff <- formatStdDiff(covariates$stdDiff)
    }
    if (!is.null(covariatesContinuous)) {
      tempCovariates <- covariateData2$covariatesContinuous
      if (!is.null(cohortId2)) {
        tempCovariates <- tempCovariates %>%
          filter(.data$cohortDefinitionId == cohortId2)
      }

      tempCovariates <- tempCovariates %>%
        select(
          covariateId = "covariateId",
          averageValue2 = "averageValue",
          standardDeviation2 = "standardDeviation",
          minValue2 = "minValue",
          p25Value2 = "p25Value",
          medianValue2 = "medianValue",
          p75Value2 = "p75Value",
          maxValue2 = "maxValue"
        ) %>%
        collect()

      tempCovariates$averageValue2 <- formatValue(tempCovariates$averageValue2)
      tempCovariates$standardDeviation2 <- formatValue(tempCovariates$standardDeviation2)
      tempCovariates$minValue2 <- formatValue(tempCovariates$minValue2)
      tempCovariates$p25Value2 <- formatValue(tempCovariates$p25Value2)
      tempCovariates$medianValue2 <- formatValue(tempCovariates$medianValue2)
      tempCovariates$p75Value2 <- formatValue(tempCovariates$p75Value2)
      tempCovariates$maxValue2 <- formatValue(tempCovariates$maxValue2)
      covariatesContinuous <- merge(covariatesContinuous, tempCovariates, all = TRUE)
      covariatesContinuous$averageValue1[is.na(covariatesContinuous$averageValue1)] <- "  "
      covariatesContinuous$standardDeviation1[is.na(covariatesContinuous$standardDeviation1)] <- "  "
      covariatesContinuous$minValue1[is.na(covariatesContinuous$minValue1)] <- "  "
      covariatesContinuous$p25Value1[is.na(covariatesContinuous$p25Value1)] <- "  "
      covariatesContinuous$medianValue1[is.na(covariatesContinuous$medianValue1)] <- "  "
      covariatesContinuous$p75Value1[is.na(covariatesContinuous$p75Value1)] <- "  "
      covariatesContinuous$maxValue1[is.na(covariatesContinuous$maxValue1)] <- "  "
      covariatesContinuous$averageValue2[is.na(covariatesContinuous$averageValue2)] <- "  "
      covariatesContinuous$standardDeviation2[is.na(covariatesContinuous$standardDeviation2)] <- "  "
      covariatesContinuous$minValue2[is.na(covariatesContinuous$minValue2)] <- "  "
      covariatesContinuous$p25Value2[is.na(covariatesContinuous$p25Value2)] <- "  "
      covariatesContinuous$medianValue2[is.na(covariatesContinuous$medianValue2)] <- "  "
      covariatesContinuous$p75Value2[is.na(covariatesContinuous$p75Value2)] <- "  "
      covariatesContinuous$maxValue2[is.na(covariatesContinuous$maxValue2)] <- "  "
      covariatesContinuous <- merge(covariatesContinuous, stdDiff[, c("covariateId", "stdDiff")])
      covariatesContinuous$stdDiff <- formatStdDiff(covariatesContinuous$stdDiff)
    }
    covariateRef <- unique(bind_rows(covariateRef, collect(covariateData2$covariateRef)))
  } else {
    covariates$count2 <- " 0"
    covariates$percent2 <- " 0"
    covariates$stdDiff <- " 0"
    covariatesContinuous$averageValue2 <- "  "
    covariatesContinuous$standardDeviation2 <- "  "
    covariatesContinuous$minValue2 <- "  "
    covariatesContinuous$p25Value2 <- "  "
    covariatesContinuous$medianValue2 <- "  "
    covariatesContinuous$p75Value2 <- "  "
    covariatesContinuous$maxValue2 <- "  "
    covariatesContinuous$stdDiff <- "  "
  }

  binaryTable <- tibble()
  continuousTable <- tibble()
  for (i in 1:nrow(specifications)) {
    if (is.na(specifications$analysisId[i])) {
      binaryTable <- bind_rows(
        binaryTable,
        tibble(Characteristic = specifications$label[i], value = "")
      )
    } else {
      idx <- analysisRef$analysisId == specifications$analysisId[i]
      if (any(idx)) {
        isBinary <- analysisRef$isBinary[idx]
        covariateIds <- NULL
        if (isBinary == "Y") {
          # Binary
          if (is.na(specifications$covariateIds[i])) {
            idx <- covariateRef$analysisId == specifications$analysisId[i]
          } else {
            covariateIds <- as.numeric(strsplit(specifications$covariateIds[i], ",")[[1]])
            idx <- covariateRef$covariateId %in% covariateIds
          }
          if (any(idx)) {
            covariateRefSubset <- covariateRef[idx, ]
            covariatesSubset <- merge(covariates, covariateRefSubset)
            if (is.null(covariateIds)) {
              covariatesSubset <- covariatesSubset[order(covariatesSubset$covariateId), ]
            } else {
              covariatesSubset <- merge(covariatesSubset, tibble(
                covariateId = covariateIds,
                rn = 1:length(covariateIds)
              ))
              covariatesSubset <- covariatesSubset[order(
                covariatesSubset$rn,
                covariatesSubset$covariateId
              ), ]
            }
            covariatesSubset$covariateName <- fixCase(gsub(
              "^.*: ",
              "",
              covariatesSubset$covariateName
            ))
            if (is.na(specifications$covariateIds[i]) || length(covariateIds) > 1) {
              binaryTable <- bind_rows(binaryTable, tibble(
                Characteristic = specifications$label[i],
                count1 = "",
                percent1 = "",
                count2 = "",
                percent2 = "",
                stdDiff = ""
              ))
              binaryTable <- bind_rows(
                binaryTable,
                tibble(
                  Characteristic = paste0("  ", covariatesSubset$covariateName),
                  count1 = covariatesSubset$count1,
                  percent1 = covariatesSubset$percent1,
                  count2 = covariatesSubset$count2,
                  percent2 = covariatesSubset$percent2,
                  stdDiff = covariatesSubset$stdDiff
                )
              )
            } else {
              binaryTable <- bind_rows(binaryTable, tibble(
                Characteristic = specifications$label[i],
                count1 = covariatesSubset$count1,
                percent1 = covariatesSubset$percent1,
                count2 = covariatesSubset$count2,
                percent2 = covariatesSubset$percent2,
                stdDiff = covariatesSubset$stdDiff
              ))
            }
          }
        } else {
          # Not binary
          if (is.na(specifications$covariateIds[i])) {
            idx <- covariateRef$analysisId == specifications$analysisId[i]
          } else {
            covariateIds <- as.numeric(strsplit(specifications$covariateIds[i], ",")[[1]])
            idx <- covariateRef$covariateId %in% covariateIds
          }
          if (any(idx)) {
            covariateRefSubset <- covariateRef[idx, ]
            covariatesSubset <- covariatesContinuous[covariatesContinuous$covariateId %in% covariateRefSubset$covariateId, ]
            covariatesSubset <- merge(covariatesSubset, covariateRefSubset)
            if (is.null(covariateIds)) {
              covariatesSubset <- covariatesSubset[order(covariatesSubset$covariateId), ]
            } else {
              covariatesSubset <- merge(covariatesSubset, tibble(
                covariateId = covariateIds,
                rn = 1:length(covariateIds)
              ))
              covariatesSubset <- covariatesSubset[order(
                covariatesSubset$rn,
                covariatesSubset$covariateId
              ), ]
            }
            covariatesSubset$covariateName <- fixCase(gsub(
              "^.*: ",
              "",
              covariatesSubset$covariateName
            ))
            if (is.na(specifications$covariateIds[i]) || length(covariateIds) > 1) {
              continuousTable <- bind_rows(
                continuousTable,
                tibble(
                  Characteristic = specifications$label[i],
                  value1 = "",
                  value2 = "",
                  stdDiff = ""
                )
              )
              for (j in 1:nrow(covariatesSubset)) {
                continuousTable <- bind_rows(
                  continuousTable,
                  tibble(
                    Characteristic = paste0("  ", covariatesSubset$covariateName[j]),
                    value1 = "",
                    value2 = "",
                    stdDiff = ""
                  )
                )
                continuousTable <- bind_rows(continuousTable, tibble(
                  Characteristic = c(
                    "    Mean",
                    "    Std. deviation",
                    "    Minimum",
                    "    25th percentile",
                    "    Median",
                    "    75th percentile",
                    "    Maximum"
                  ),
                  value1 = c(
                    covariatesSubset$averageValue1[j],
                    covariatesSubset$standardDeviation1[j],
                    covariatesSubset$minValue1[j],
                    covariatesSubset$p25Value1[j],
                    covariatesSubset$medianValue1[j],
                    covariatesSubset$p75Value1[j],
                    covariatesSubset$maxValue1[j]
                  ),
                  value2 = c(
                    covariatesSubset$averageValue2[j],
                    covariatesSubset$standardDeviation2[j],
                    covariatesSubset$minValue2[j],
                    covariatesSubset$p25Value2[j],
                    covariatesSubset$medianValue2[j],
                    covariatesSubset$p75Value2[j],
                    covariatesSubset$maxValue2[j]
                  ),
                  stdDiff = c(
                    covariatesSubset$stdDiff[j],
                    "  ",
                    "  ",
                    "  ",
                    "  ",
                    "  ",
                    "  "
                  )
                ))
              }
            } else {
              continuousTable <- bind_rows(
                continuousTable,
                tibble(
                  Characteristic = specifications$label[i],
                  value1 = "",
                  value2 = "",
                  stdDiff = ""
                )
              )
              continuousTable <- bind_rows(continuousTable, tibble(
                Characteristic = c(
                  "    Mean",
                  "    Std. deviation",
                  "    Minimum",
                  "    25th percentile",
                  "    Median",
                  "    75th percentile",
                  "    Maximum"
                ),
                value1 = c(
                  covariatesSubset$averageValue1,
                  covariatesSubset$standardDeviation1,
                  covariatesSubset$minValue1,
                  covariatesSubset$p25Value1,
                  covariatesSubset$medianValue1,
                  covariatesSubset$p75Value1,
                  covariatesSubset$maxValue1
                ),
                value2 = c(
                  covariatesSubset$averageValue2,
                  covariatesSubset$standardDeviation2,
                  covariatesSubset$minValue2,
                  covariatesSubset$p25Value2,
                  covariatesSubset$medianValue2,
                  covariatesSubset$p75Value2,
                  covariatesSubset$maxValue2
                ),
                stdDiff = c(
                  covariatesSubset$stdDiff,
                  "  ",
                  "  ",
                  "  ",
                  "  ",
                  "  ",
                  "  "
                )
              ))
            }
          }
        }
      }
    }
  }
  if (nrow(continuousTable) != 0) {
    if (showCounts && showPercent) {
      if (comparison) {
        continuousTable$dummy1 <- ""
        continuousTable$dummy2 <- ""
        continuousTable <- continuousTable[, c(1, 5, 2, 6, 3, 4)]
        colnames(continuousTable) <- c("Characteristic", "", "Value", "", "Value", "Std.Diff")
      } else {
        continuousTable$dummy <- ""
        continuousTable <- continuousTable[, c(1, 3, 2)]
        colnames(continuousTable) <- c("Characteristic", "", "Value")
      }
    } else {
      if (comparison) {
        colnames(continuousTable) <- c("Characteristic", "Value", "Value", "Std.Diff")
      } else {
        continuousTable$value2 <- NULL
        continuousTable$stdDiff <- NULL
        colnames(continuousTable) <- c("Characteristic", "Value")
      }
    }
  }

  if (nrow(binaryTable) != 0) {
    if (comparison) {
      colnames(binaryTable) <- c(
        "Characteristic",
        "Count",
        paste0(
          "% (n = ",
          formatCount(attr(covariateData1, "metaData")$populationSize),
          ")"
        ),
        "Count",
        paste0(
          "% (n = ",
          formatCount(attr(covariateData2, "metaData")$populationSize),
          ")"
        ),
        "Std.Diff"
      )
      if (!showCounts) {
        binaryTable[, 4] <- NULL
        binaryTable[, 2] <- NULL
      }
      if (!showPercent) {
        binaryTable[, 5] <- NULL
        binaryTable[, 3] <- NULL
      }
    } else {
      binaryTable$count2 <- NULL
      binaryTable$percent2 <- NULL
      binaryTable$stdDiff <- NULL
      colnames(binaryTable) <- c(
        "Characteristic",
        "Count",
        paste0(
          "% (n = ",
          formatCount(attr(covariateData1, "metaData")$populationSize),
          ")"
        )
      )
      if (!showCounts) {
        binaryTable[, 2] <- NULL
      }
      if (!showPercent) {
        binaryTable[, 3] <- NULL
      }
    }
  }

  if (output == "two columns") {
    if (nrow(binaryTable) > nrow(continuousTable)) {
      if (nrow(continuousTable) > 0) {
        rowsPerColumn <- ceiling((nrow(binaryTable) + nrow(continuousTable) + 2) / 2)
        column1 <- binaryTable[1:rowsPerColumn, ]
        ct <- continuousTable
        colnames(ct) <- colnames(binaryTable)
        column2 <- rbind(
          binaryTable[(rowsPerColumn + 1):nrow(binaryTable), ],
          rep("", ncol(binaryTable)),
          colnames(continuousTable),
          ct
        )
      } else {
        rowsPerColumn <- ceiling((nrow(binaryTable) + nrow(continuousTable)) / 2)
        column1 <- binaryTable[1:rowsPerColumn, ]
        column2 <- binaryTable[(rowsPerColumn + 1):nrow(binaryTable), ]
      }
      if (nrow(column1) > nrow(column2)) {
        column2 <- rbind(column2, rep("", ncol(binaryTable)))
      }
      result <- cbind(column1, column2)
    } else {
      rlang::abort(paste(
        "createTable1 cannot display the output in two columns because there are more rows in the table of continuous covariates than there are in the table of binary covariates.",
        "\nTry using `output = 'one column'` when calling createTable1()"
      ))
    }
  } else if (output == "one column") {
    ct <- continuousTable
    colnames(ct) <- colnames(binaryTable)
    result <- rbind(
      binaryTable,
      rep("", ncol(binaryTable)),
      colnames(continuousTable),
      ct
    )
  } else {
    result <- list(part1 = binaryTable, part2 = continuousTable)
  }
  return(result)
}

#' Create covariate settings for a table 1
#'
#' @description
#' Creates a covariate settings object for generating only those covariates that will be included in a
#' table 1. This function works by filtering the \code{covariateSettings} object for the covariates in
#' the \code{specifications} object.
#'
#' @param specifications                A specifications object for generating a table using the
#'                                      \code{\link{createTable1}} function.
#' @param covariateSettings             The covariate settings object to use as the basis for the
#'                                      filtered covariate settings.
#' @param includedCovariateConceptIds   A list of concept IDs that should be used to construct
#'                                      covariates.
#' @param addDescendantsToInclude       Should descendant concept IDs be added to the list of concepts
#'                                      to include?
#' @param excludedCovariateConceptIds   A list of concept IDs that should NOT be used to construct
#'                                      covariates.
#' @param addDescendantsToExclude       Should descendant concept IDs be added to the list of concepts
#'                                      to exclude?
#' @param includedCovariateIds          A list of covariate IDs that should be restricted to.
#'
#' @return
#' A covariate settings object, for example to be used when calling the
#' \code{\link{getDbCovariateData}} function.
#'
#' @examples
#' \dontrun{
#' table1CovSettings <- createTable1CovariateSettings(
#'   specifications = getDefaultTable1Specifications(),
#'   covariateSettings = createDefaultCovariateSettings(),
#'   includedCovariateConceptIds = c(),
#'   addDescendantsToInclude = FALSE,
#'   excludedCovariateConceptIds = c(),
#'   addDescendantsToExclude = FALSE,
#'   includedCovariateIds = c()
#' )
#' }
#' @export
createTable1CovariateSettings <- function(specifications = getDefaultTable1Specifications(),
                                          covariateSettings = createDefaultCovariateSettings(),
                                          includedCovariateConceptIds = c(),
                                          addDescendantsToInclude = FALSE,
                                          excludedCovariateConceptIds = c(),
                                          addDescendantsToExclude = FALSE,
                                          includedCovariateIds = c()) {
  covariateSettings <- convertPrespecSettingsToDetailedSettings(covariateSettings)
  filterBySpecs <- function(analysis) {
    if (analysis$analysisId %in% specifications$analysisId) {
      covariateIds <- specifications$covariateIds[specifications$analysisId == analysis$analysisId]
      if (!any(is.na(covariateIds))) {
        covariateIds <- as.numeric(unlist(strsplit(covariateIds, ",")))
        analysis$includedCovariateIds <- covariateIds
      }
      analysis$includedCovariateConceptIds <- includedCovariateConceptIds
      analysis$addDescendantsToInclude <- addDescendantsToInclude
      analysis$excludedCovariateConceptIds <- excludedCovariateConceptIds
      analysis$addDescendantsToExclude <- addDescendantsToExclude
      analysis$includedCovariateIds <- unique(c(
        analysis$includedCovariateIds,
        includedCovariateIds
      ))
      return(analysis)
    } else {
      return(NULL)
    }
  }
  analyses <- lapply(covariateSettings$analyses, filterBySpecs)
  analyses <- analyses[which(!sapply(analyses, is.null))]
  covariateSettings$analyses <- analyses
  return(covariateSettings)
}
OHDSI/FeatureExtraction documentation built on March 7, 2024, 2:47 p.m.