R/build-covariates.R

Defines functions createoutcomeCountCovariateSettings getDboutcomeCountCovariateData createMiHistCovariateSettings getMiHistCovariateData createLooCovariateSettings getDbLooCovariateData

#########################################
#### CREATE CUSTOM COVARIATE BUILDERS ###
#########################################

# This script creates several custom covariate builders, namely for:
# - MI prior to cohort entry
# - Obesity at cohort entry
# - Length of follow-up
# - Healthcare utilisation counts in follow-up - hospital, outpatient, office, emergency room
# - Number of COPD exacerbations by severity in follow-up (moderate, severe)


### OUTCOME COUNTS ----

# code works for all outcomes for which cohorts have been defined - currently healthcare utilisation counts and COPD exacerbations
# there are probably better ways of generating outputs for multiple concepts using these builders.

createoutcomeCountCovariateSettings <- function(useoutcomeCount = TRUE, analysisName = NULL, covariateID = NULL, analysisID = NULL, cohortID_out = NULL, futime = 10^6) {
  covariateSettings <- list(useoutcomeCount = useoutcomeCount, analysisName = analysisName, covariateID = covariateID, analysisID = analysisID, cohortID_out = cohortID_out, futime = futime)
  attr(covariateSettings, "fun") <- "getDboutcomeCountCovariateData"
  class(covariateSettings) <- "covariateSettings"
  return(covariateSettings)
}

getDboutcomeCountCovariateData <- function(connection,
                                           oracleTempSchema = NULL,
                                           cdmDatabaseSchema,
                                           cohortTable = "#cohort_person",
                                           cohortId = -1,
                                           cdmVersion = "5",
                                           rowIdField = "subject_id",
                                           covariateSettings,
                                           aggregated = FALSE) {
  writeLines("Constructing outcome count variable")
  if (covariateSettings$useoutcomeCount == FALSE) {
    return(NULL)
  }
  if (aggregated)
    stop("Aggregation not supported")
  # Some SQL to construct the covariate:
  sql <- "
  SELECT d1.@row_id_field AS row_id, @covariate_id AS covariate_id, COUNT(*) AS covariate_value
  FROM @cohort_table d1
  LEFT JOIN @cohort_table d2
  ON d1.subject_id = d2.subject_id
  WHERE d2.cohort_definition_id = @cohort_id_out
  AND d1.cohort_definition_id = @cohort_id
  AND d2.cohort_start_date > d1.cohort_start_date
  AND (d2.cohort_start_date - d1.cohort_start_date) <= @fu_time
  GROUP BY d1.subject_id
  "
  sql <- SqlRender::render(sql,
                           cohort_table = cohortTable,
                           cohort_id = cohortId,
                           cohort_id_out = covariateSettings$cohortID_out,
                           row_id_field = rowIdField,
                           covariate_id = covariateSettings$covariateID,
                           fu_time = covariateSettings$futime)
  sql <- SqlRender::translate(sql, targetDialect = attr(connection, "dbms"))
  # Retrieve the covariate:
  covariates <- DatabaseConnector::querySql.ffdf(connection, sql)
  # Convert colum names to camelCase:
  colnames(covariates) <- SqlRender::snakeCaseToCamelCase(colnames(covariates))
  # Construct covariate reference:
  covariateRef <- data.frame(covariateId = covariateSettings$covariateID,
                             covariateName = covariateSettings$analysisName,
                             analysisId = covariateSettings$analysisID,
                             conceptId = 0)
  covariateRef <- ff::as.ffdf(covariateRef)
  # Construct analysis reference:
  analysisRef <- data.frame(analysisId = covariateSettings$analysisID,
                            analysisName = covariateSettings$analysisName,
                            domainId = "Visit",
                            startDay = NA,
                            endDay = NA,
                            isBinary = "N",
                            missingMeansZero = "Y")
  analysisRef <- ff::as.ffdf(analysisRef)
  # Construct analysis reference:
  metaData <- list(sql = sql, call = match.call())
  result <- list(covariates = covariates,
                 covariateRef = covariateRef,
                 analysisRef = analysisRef,
                 metaData = metaData)
  class(result) <- "covariateData"
  return(result)
}


### PREVIOUS MI ----

# this can be generalised to other conditions if required.

# function for creating covariate settings
createMiHistCovariateSettings <- function(useHistoryOfMI = TRUE, covariateID = NULL, analysisID = NULL) {
  covariateSettings <- list(useHistoryOfMI = useHistoryOfMI, covariateID = covariateID, analysisID = analysisID)
  attr(covariateSettings, "fun") <- "getMiHistCovariateData"
  class(covariateSettings) <- "covariateSettings"
  return(covariateSettings)
}

# function for deriving covariate data
# nec to export b/c otherwsie getdbcovariatedata does not recognise it
#' @export
getMiHistCovariateData <- function(connection,
                                  oracleTempSchema = NULL,
                                  cdmDatabaseSchema,
                                  cohortTable = "#cohort_person",
                                  cohortId = -1,
                                  cdmVersion = "5",
                                  rowIdField = "subject_id",
                                  covariateSettings,
                                  aggregated = FALSE) {
  writeLines("Constructing history of MI covariate")
  if (covariateSettings$useHistoryOfMI == FALSE) {
    return(NULL)
  }
  if (aggregated)
    stop("Aggregation not supported") # why not? can i remove/amend this?
  # Some SQL to construct the covariate:
  sql <- "
SELECT @row_id_field AS row_id, @covariate_id AS covariate_id, CAST (condition_start_date < cohort_start_date AND condition_start_date IS NOT NULL AS INTEGER) AS covariate_value
  FROM (
  SELECT subject_id, cohort_definition_id, MIN(cohort_start_date) AS cohort_start_date, MIN(condition_start_date) AS condition_start_date
  FROM @cohort_table c
  LEFT JOIN (
  SELECT person_id, condition_concept_id, condition_start_date
  FROM @cdm_database_schema.condition_occurrence
  INNER JOIN @cdm_database_schema.concept_ancestor
  ON descendant_concept_id = condition_concept_id
  WHERE ancestor_concept_id IN (4329847) --- SNOMED concept for MI (all descendants)
  ) d1
  ON c.subject_id = d1.person_id AND  cohort_definition_id = @cohort_id
  GROUP BY subject_id, cohort_definition_id
  ) d2"
  sql <- SqlRender::render(sql,
                           cohort_table = cohortTable,
                           cohort_id = cohortId,
                           row_id_field = rowIdField,
                           cdm_database_schema = cdmDatabaseSchema,
                           covariate_id = covariateSettings$covariateID)
  sql <- SqlRender::translate(sql, targetDialect = attr(connection, "dbms"))
  # Retrieve the covariate:
  covariates <- DatabaseConnector::querySql.ffdf(connection, sql)
  # Convert colum names to camelCase:
  colnames(covariates) <- SqlRender::snakeCaseToCamelCase(colnames(covariates))
  # Construct covariate reference:
  covariateRef <- data.frame(covariateId = covariateSettings$covariateID,
                             covariateName = "Previous MI",
                             analysisId = covariateSettings$analysisID,
                             conceptId = 0)
  covariateRef <- ff::as.ffdf(covariateRef)
  # Construct analysis reference:
  analysisRef <- data.frame(analysisId = covariateSettings$analysisID,
                            analysisName = "Previous MI",
                            domainId = "Condition",
                            startDay = NA, #0
                            endDay = NA, #0
                            isBinary = "Y",
                            missingMeansZero = NA)
  analysisRef <- ff::as.ffdf(analysisRef)
  # Construct analysis reference:
  metaData <- list(sql = sql, call = match.call())
  result <- list(covariates = covariates,
                 covariateRef = covariateRef,
                 analysisRef = analysisRef,
                 metaData = metaData)
  class(result) <- "covariateData"
  return(result)
}

### DURATION OF FOLLOW-UP ----

createLooCovariateSettings <- function(useLengthOfObs = TRUE, covariateID = NULL, analysisID = NULL) {
  covariateSettings <- list(useLengthOfObs = useLengthOfObs, covariateID = covariateID, analysisID = analysisID)
  attr(covariateSettings, "fun") <- "getDbLooCovariateData"
  class(covariateSettings) <- "covariateSettings"
  return(covariateSettings)
}

#' @export
getDbLooCovariateData <- function(connection,
                                  oracleTempSchema = NULL,
                                  cdmDatabaseSchema,
                                  cohortTable = "#cohort_person",
                                  cohortId = -1,
                                  cdmVersion = "5",
                                  rowIdField = "subject_id",
                                  covariateSettings,
                                  aggregated = FALSE) {
  writeLines("Constructing length of follow-up covariates")
  if (covariateSettings$useLengthOfObs == FALSE) {
    return(NULL)
  }
  if (aggregated)
    stop("Aggregation not supported")
  # Some SQL to construct the covariate:
  sql <- paste("SELECT @row_id_field AS row_id, @covariate_id AS covariate_id,",
               "CAST(cohort_end_date - cohort_start_date AS NUMERIC) / 365",
               "AS covariate_value",
               "FROM @cohort_table c",
               "WHERE cohort_definition_id = @cohort_id")
  sql <- SqlRender::render(sql,
                           cohort_table = cohortTable,
                           cohort_id = cohortId,
                           row_id_field = rowIdField,
                           covariate_id = covariateSettings$covariateID,
                           cdm_database_schema = cdmDatabaseSchema)
  sql <- SqlRender::translate(sql, targetDialect = attr(connection, "dbms"))
  # Retrieve the covariate:
  covariates <- DatabaseConnector::querySql.ffdf(connection, sql)
  # Convert colum names to camelCase:
  colnames(covariates) <- SqlRender::snakeCaseToCamelCase(colnames(covariates))
  # Construct covariate reference:
  covariateRef <- data.frame(covariateId = covariateSettings$covariateID,
                             covariateName = "Length of follow-up (years)",
                             analysisId = covariateSettings$analysisID,
                             conceptId = 0)
  covariateRef <- ff::as.ffdf(covariateRef)
  # Construct analysis reference:
  analysisRef <- data.frame(analysisId = covariateSettings$analysisID,
                            analysisName = "Length of follow-up (years)",
                            domainId = "Demographics",
                            startDay = NA,
                            endDay = NA,
                            isBinary = "N",
                            missingMeansZero = "Y")
  analysisRef <- ff::as.ffdf(analysisRef)
  # Construct analysis reference:
  metaData <- list(sql = sql, call = match.call())
  result <- list(covariates = covariates,
                 covariateRef = covariateRef,
                 analysisRef = analysisRef,
                 metaData = metaData)
  class(result) <- "covariateData"
  return(result)
}
seamuskent/ehdenHtaCopd documentation built on Feb. 22, 2020, 8:14 a.m.