R/GetCovariatesFromCohortAttributes.R

Defines functions createCohortAttrCovariateSettings getDbCohortAttrCovariatesData

Documented in createCohortAttrCovariateSettings getDbCohortAttrCovariatesData

# Copyright 2024 Observational Health Data Sciences and Informatics
#
# This file is part of FeatureExtraction
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Getcovariate information from the database through the cohort_attribute table
#'
#' @description
#' Constructs covariates using the cohort_attribute table.
#'
#' @param covariateSettings   An object of type \code{covariateSettings} as created using the
#'                            \code{\link{createCohortAttrCovariateSettings}} function.
#'
#' @template GetCovarParams
#'
#' @examples
#' \donttest{
#' connectionDetails <- Eunomia::getEunomiaConnectionDetails()
#' Eunomia::createCohorts(
#'   connectionDetails = connectionDetails,
#'   cdmDatabaseSchema = "main",
#'   cohortDatabaseSchema = "main",
#'   cohortTable = "cohort"
#' )
#' connection <- DatabaseConnector::connect(connectionDetails)
#' covariateSettings <- createCohortAttrCovariateSettings(
#'   attrDatabaseSchema = "main",
#'   cohortAttrTable = "cohort_attribute",
#'   attrDefinitionTable = "attribute_definition",
#'   includeAttrIds = c(1),
#'   isBinary = FALSE,
#'   missingMeansZero = FALSE
#' )
#'
#' covData <- getDbCohortAttrCovariatesData(
#'   connection = connection,
#'   tempEmulationSchema = NULL,
#'   cdmDatabaseSchema = "main",
#'   cdmVersion = "5",
#'   cohortTable = "cohort",
#'   cohortIds = 1,
#'   rowIdField = "subject_id",
#'   covariateSettings = covariateSettings,
#'   aggregated = FALSE
#' )
#' }
#'
#' @export
getDbCohortAttrCovariatesData <- function(connection,
                                          oracleTempSchema = NULL,
                                          cdmDatabaseSchema,
                                          cohortTable = "#cohort_person",
                                          cohortId = -1,
                                          cohortIds = c(-1),
                                          cdmVersion = "5",
                                          rowIdField = "subject_id",
                                          covariateSettings,
                                          aggregated = FALSE,
                                          tempEmulationSchema = NULL) {
  if (aggregated) {
    stop("Aggregation not implemented for covariates from cohort attributes.")
  }
  if (cdmVersion == "4") {
    stop("Common Data Model version 4 is not supported")
  }
  if (!missing(cohortId)) {
    warning("cohortId argument has been deprecated, please use cohortIds")
    cohortIds <- cohortId
  }
  if (!is.null(oracleTempSchema) && oracleTempSchema != "") {
    rlang::warn("The 'oracleTempSchema' argument is deprecated. Use 'tempEmulationSchema' instead.",
      .frequency = "regularly",
      .frequency_id = "oracleTempSchema"
    )
    tempEmulationSchema <- oracleTempSchema
  }
  start <- Sys.time()
  writeLines("Constructing covariates from cohort attributes table")

  if (is.null(covariateSettings$includeAttrIds) || length(covariateSettings$includeAttrIds) == 0) {
    hasIncludeAttrIds <- FALSE
  } else {
    if (!is.numeric(covariateSettings$includeAttrIds)) {
      stop("includeAttrIds must be a (vector of) numeric")
    }
    hasIncludeAttrIds <- TRUE
    DatabaseConnector::insertTable(connection,
      tableName = "#included_attr",
      data = data.frame(attribute_definition_id = as.integer(covariateSettings$includeAttrIds)),
      dropTableIfExists = TRUE,
      createTable = TRUE,
      tempTable = TRUE,
      tempEmulationSchema = tempEmulationSchema
    )
  }
  sql <- SqlRender::readSql(system.file("sql/sql_server/GetAttrCovariates.sql", package = "FeatureExtraction"))
  renderedSql <- SqlRender::render(
    sql = sql,
    attr_database_schema = covariateSettings$attrDatabaseSchema,
    cohort_table = cohortTable,
    row_id_field = rowIdField,
    cohort_attribute_table = covariateSettings$cohortAttrTable,
    has_include_attr_ids = hasIncludeAttrIds
  )
  renderedSql <- SqlRender::translate(
    sql = renderedSql,
    targetDialect = attr(connection, "dbms"),
    tempEmulationSchema = tempEmulationSchema
  )
  # renderedSql <- SqlRender::loadRenderTranslateSql("GetAttrCovariates.sql",
  #                                                  packageName = "FeatureExtraction",
  #                                                  dbms = attr(connection, "dbms"),
  #                                                  tempEmulationSchema = tempEmulationSchema,
  #                                                  attr_database_schema = covariateSettings$attrDatabaseSchema,
  #                                                  cohort_table = cohortTable,
  #                                                  row_id_field = rowIdField,
  #                                                  cohort_attribute_table = covariateSettings$cohortAttrTable,
  #                                                  has_include_attr_ids = hasIncludeAttrIds)

  covariates <- DatabaseConnector::querySql(connection, renderedSql, snakeCaseToCamelCase = TRUE)
  covariateRefSql <- "SELECT attribute_definition_id AS covariate_id, attribute_name AS covariate_name FROM @attr_database_schema.@attr_definition_table ORDER BY attribute_definition_id"
  covariateRefSql <- SqlRender::render(covariateRefSql,
    attr_database_schema = covariateSettings$attrDatabaseSchema,
    attr_definition_table = covariateSettings$attrDefinitionTable
  )
  covariateRefSql <- SqlRender::translate(
    sql = covariateRefSql,
    targetDialect = attr(connection, "dbms"),
    tempEmulationSchema = tempEmulationSchema
  )
  covariateRef <- DatabaseConnector::querySql(connection, covariateRefSql, snakeCaseToCamelCase = TRUE)
  covariateRef$analysisId <- rep(as.numeric(covariateSettings$analysisId), length = nrow(covariateRef))
  covariateRef$conceptId <- rep(0, length = nrow(covariateRef))

  analysisRef <- data.frame(
    analysisId = as.numeric(covariateSettings$analysisId),
    analysisName = "Covariates from cohort attributes",
    domainId = "Cohort",
    startDay = as.numeric(NA),
    endDay = as.numeric(NA),
    isBinary = ifelse(covariateSettings$isBinary, "Y", "N"),
    missingMeansZero = ifelse(covariateSettings$missingMeansZero, "Y", "N")
  )
  delta <- Sys.time() - start
  writeLines(paste("Loading took", signif(delta, 3), attr(delta, "units")))

  result <- createEmptyCovariateData(cohortIds, aggregated, covariateSettings$temporal)
  result$covariates <- covariates
  result$covariateRef <- covariateRef
  result$analysisRef <- analysisRef

  return(result)
}

#' Create cohort attribute covariate settings
#'
#' @details
#' Creates an object specifying where the cohort attributes can be found to construct covariates. The
#' attributes should be defined in a table with the same structure as the attribute_definition table
#' in the Common Data Model. It should at least have these columns: \describe{
#' \item{attribute_definition_id}{A unique identifier of type integer.} \item{attribute_name}{A short
#' description of the attribute.} } The cohort attributes themselves should be stored in a table with
#' the same format as the cohort_attribute table in the Common Data Model. It should at least have
#' these columns: \describe{ \item{cohort_definition_id}{A key to link to the cohort table.}
#' \item{subject_id}{A key to link to the cohort table.} \item{cohort_start_date}{A key to link to the
#' cohort table.} \item{attribute_definition_id}{An foreign key linking to the attribute definition
#' table.} \item{value_as_number}{A real number.} }
#'
#' @param analysisId            A unique identifier for this analysis.
#' @param attrDatabaseSchema    The database schema where the attribute definition and cohort attribute
#'                              table can be found.
#' @param attrDefinitionTable   The name of the attribute definition table.
#' @param cohortAttrTable       The name of the cohort attribute table.
#' @param includeAttrIds        (optional) A list of attribute definition IDs to restrict to.
#' @param isBinary              Needed for aggregation: Are these binary variables? Binary
#'                              variables should only have the values 0 or 1.
#' @param missingMeansZero      Needed for aggregation: For continuous values, should missing
#'                              values be interpreted as 0?
#'
#' @return
#' An object of type \code{covariateSettings}, to be used in other functions.
#'
#' @examples
#' \donttest{
#' covariateSettings <- createCohortAttrCovariateSettings(
#'   analysisId = 1,
#'   attrDatabaseSchema = "main",
#'   attrDefinitionTable = "attribute_definition",
#'   cohortAttrTable = "cohort_attribute",
#'   includeAttrIds = c(1),
#'   isBinary = FALSE,
#'   missingMeansZero = FALSE
#' )
#' }
#'
#' @export
createCohortAttrCovariateSettings <- function(analysisId = -1,
                                              attrDatabaseSchema,
                                              attrDefinitionTable = "attribute_definition",
                                              cohortAttrTable = "cohort_attribute",
                                              includeAttrIds = c(),
                                              isBinary = FALSE,
                                              missingMeansZero = FALSE) {
  # First: get the default values:
  covariateSettings <- list()
  for (name in names(formals(createCohortAttrCovariateSettings))) {
    covariateSettings[[name]] <- get(name)
  }
  # Next: overwrite defaults with actual values if specified:
  values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3)))
  for (name in names(values)) {
    if (name %in% names(covariateSettings)) {
      covariateSettings[[name]] <- values[[name]]
    }
  }

  attr(covariateSettings, "fun") <- "getDbCohortAttrCovariatesData"
  class(covariateSettings) <- "covariateSettings"
  return(covariateSettings)
}

Try the FeatureExtraction package in your browser

Any scripts or data that you put into this service are public.

FeatureExtraction documentation built on Oct. 18, 2024, 9:06 a.m.