RiskStratifiedEstimation: Risk Stratified Effect Estimation

Documented in createAnalysisSettings createCreateIPWArgs createCreatePsArgs createDatabaseSettings createGetCmDataArgs createGetCovariateSettings createGetDataSettings createGetPlpDataArgs createMatchOnPsArgs createPopulationSettings createRunCmAnalysesArgs createRunCmSettingsArgs createRunExistingPlpSettingsArgs createRunPlpAnalysesArgs createRunPlpSettingsArgs createRunSettings createStratifyByPsArgs

# Copyright 2020 Observational Health Data Sciences and Informatics
#
# This file is part of RiskStratifiedEstimation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# @author Observational Health Data Sciences and Informatics
# @author Alexandros Rekkas
# @author Peter Rijnbeek


#' Create a parameter object for the function runPlp
#'
#' @details
#' Create an object defining the parameter values.
#'
#' @param outcomeId              The outcomeId for the prediction.
#' @param splitSettings          An object of type splitSettings that specifies
#'                               how to split the data into train/validation/test.
#'                               The default settings can be created using
#'                               \code{\link[PatientLevelPrediction]{createDefaultSplitSetting}}
#' @param sampleSettings         An object of type \code{sampleSettings} that specifies any
#'                               under/over sampling to be done. Should be created with
#'                               \code{\link[PatientLevelPrediction]{createSampleSettings}}
#' @param featureEngineeringSettings An object of \code{featureEngineeringSettings} specifying
#'                                   any feature engineering to be learned (using the train data)
#' @param preprocessSettings      An object of \code{preprocessSettings}. This setting specifies
#'                                the minimum fraction of target population who must have
#'                                a covariate for it to be included in the model training
#'                                and whether to normalise the covariates before training.
#'                                Should be created with
#'                                \code{\link[PatientLevelPrediction]{createPreprocessSettings}}.
#' @param modelSettings           An object of class \code{modelSettings}.
#' @param logSettings             An object of \code{logSettings} created using
#'                                \code{\link[PatientLevelPrediction]{createLogSettings}} specifying
#'                                how the logging is done.
#' @param executeSettings         An object of type \code{executeSettings} specifying which parts
#'                                of the analysis to run. Should be created using
#'                                \code{\link[PatientLevelPrediction]{createExecuteSettings}}.
#' @param matchingSettings       The settings for the construction of the population on which the
#'                               prediction model will be developed.
#' @param timepoint              The timepoint to predict risk (survival models only)
#'
#' @export
#'
createRunPlpAnalysesArgs <- function(
    outcomeId                  = NULL,
    splitSettings              = PatientLevelPrediction::createDefaultSplitSetting(),
    sampleSettings             = PatientLevelPrediction::createSampleSettings(),
    featureEngineeringSettings = PatientLevelPrediction::createFeatureEngineeringSettings(),
    preprocessSettings         = PatientLevelPrediction::createPreprocessSettings(),
    modelSettings              = PatientLevelPrediction::setLassoLogisticRegression(),
    logSettings                = PatientLevelPrediction::createLogSettings(),
    executeSettings            = PatientLevelPrediction::createDefaultExecuteSettings(),
    matchingSettings           = CohortMethod::createMatchOnPsArgs(),
    timepoint                  = NULL
) {

  # First: get default values:
  analysis <- list()
  for (name in names(formals(createRunPlpAnalysesArgs))) {
    analysis[[name]] <- get(name)
  }
  # Second: overwrite defaults with actual values:
  values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3)))
  for (name in names(values)) {
    if (name %in% names(analysis))
      analysis[[name]] <- values[[name]]
  }
  attr(analysis, "fun") <- "createRunPlpAnalysesArgs"
  class(analysis) <- "runPlpAnalysesArgs"
  return(analysis)
}


#' @title                   Create run settings for prediction
#' @param analyses          A list with settings for the prediction of each of
#'                          the risk stratification outcomes.
#'
#' @param defaultSettings  An object of type \code{runPlpAnalysesArgs} used as
#'                         the default settings for outcome risk prediction. Should
#'                         be created with
#'                         \code{\link[RiskStratifiedEstimation]{createRunPlpAnalysesArgs}}
#' @return                 An object of type \code{runPlpSettingsArgs} to be passed
#'                         to \code{\link[RiskStratifiedEstimation]{createRunSettings}}
#'
#' @export
createRunPlpSettingsArgs <- function(
    analyses = list(),
    defaultSettings = createRunPlpAnalysesArgs()
) {

  # First: get default values:
  analysis <- list()
  for (name in names(formals(createRunPlpSettingsArgs))) {
    analysis[[name]] <- get(name)
  }
  # Second: overwrite defaults with actual values:
  values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3)))
  for (name in names(values)) {
    if (name %in% names(analysis))
      analysis[[name]] <- values[[name]]
  }
  attr(analysis, "fun") <- "createRunPlpSettingsArgs"
  class(analysis) <- "runPlpSettingsArgs"
  return(analysis)

}



#' @title Create prediction settings for an existing model
#' @param outcomeId              The outcome ID of the existing model.
#'
#' @param plpResultDirectory     The directory where the existing model is store.d
#' @param predictionSettings     A list with prediction settings.
#'
#' @export
createRunExistingPlpSettingsArgs <- function(
    outcomeId,
    plpResultDirectory,
    predictionSettings = list()
) {

  analysis <- list()
  for (name in names(formals(createRunExistingPlpSettingsArgs))) {
    analysis[[name]] <- get(name)
  }
  # Second: overwrite defaults with actual values:
  values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3)))
  for (name in names(values)) {
    if (name %in% names(analysis))
      analysis[[name]] <- values[[name]]
  }
  attr(analysis, "fun") <- "createRunExistingPlpSettingsArgs"
  class(analysis) <- "runExistingPlpSettingsArgs"
  return(analysis)

}




#' Create a settings object for running the analyses Create the settings for running the analyses. The
#' input consists of two parts: 1) the settings for running the prediction algorithms and 2) the
#' settings for estimating treatment effects within strata of predicted risk.
#'
#' @param runPlpSettings   A parameterer object of type \code{runPlpSettingsArgs} defined using the
#'                         function \code{\link[RiskStratifiedEstimation]{createRunPlpSettingsArgs}}.
#' @param runCmSettings    A parameter object of type \code{runCmSettingsArgs} defined using the
#'                         function \code{\link[RiskStratifiedEstimation]{createRunCmSettingsArgs}}
#'
#' @return
#' An R object of type \code{runSettings}
#' @export

createRunSettings <- function(
    runPlpSettings = createRunPlpSettingsArgs(),
    runCmSettings = createRunCmSettingsArgs()
) {

  res <- list(runPlpSettings = runPlpSettings, runCmSettings = runCmSettings)
  attr(res, "fun") <- "createRunSettings"
  class(res) <- "runSettings"
  return(res)

}



#' Create a parameter object for the function getPlpData
#'
#' @details
#' Create an object defining the parameter values.
#'
#' @param studyStartDate               A calendar date specifying the minimum date that a cohort index
#'                                     date can appear. Date format is 'yyyymmdd'.
#' @param studyEndDate                 A calendar date specifying the maximum date that a cohort index
#'                                     date can appear. Date format is 'yyyymmdd'. Important: the
#'                                     studyEnd data is also used to truncate risk windows, meaning no
#'                                     outcomes beyond the study end date will be considered.
#' @param firstExposureOnly            Should only the first exposure per subject be included? Note
#'                                     that this is typically done in the createStudyPopulation
#'                                     function, but can already be done here for efficiency reasons.
#' @param washoutPeriod                The mininum required continuous observation time prior to index
#'                                     date for a person to be included in the at risk cohort. Note
#'                                     that this is typically done in the createStudyPopulation
#'                                     function,but can already be done here for efficiency reasons.
#' @param excludeDrugsFromCovariates   A redundant option
#'
#' @export
createGetPlpDataArgs <- function(studyStartDate = "",
                                 studyEndDate = "",
                                 firstExposureOnly = FALSE,
                                 washoutPeriod = 0,
                                 excludeDrugsFromCovariates = FALSE) {
  # First: get default values:
  analysis <- list()
  for (name in names(formals(createGetPlpDataArgs))) {
    analysis[[name]] <- get(name)
  }
  # Second: overwrite defaults with actual values:
  values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3)))
  for (name in names(values)) {
    if (name %in% names(analysis))
      analysis[[name]] <- values[[name]]
  }
  attr(analysis, "fun") <- "createGetPlpDataArgs"
  class(analysis) <- "args"
  return(analysis)
}

#' Create a parameter object for the function getDbCohortMethodData
#'
#' @details
#' Create an object defining the parameter values.
#'
#' @param studyStartDate               A calendar date specifying the minimum date that a cohort index
#'                                     date can appear. Date format is 'yyyymmdd'.
#' @param studyEndDate                 A calendar date specifying the maximum date that a cohort index
#'                                     date can appear. Date format is 'yyyymmdd'. Important: the study
#'                                     end data is also used to truncate risk windows, meaning no
#'                                     outcomes beyond the study end date will be considered.
#' @param excludeDrugsFromCovariates   Should the target and comparator drugs (and their descendant
#'                                     concepts) be excluded from the covariates? Note that this will
#'                                     work if the drugs are actualy drug concept IDs (and not
#'                                     cohortIDs).
#' @param firstExposureOnly            Should only the first exposure per subject be included? Note
#'                                     that this is typically done in the
#'                                     createStudyPopulationfunction, but can already be done here for
#'                                     efficiency reasons.
#' @param removeDuplicateSubjects      Remove subjects that are in both the target and comparator
#'                                     cohort? See details for allowed values.Note that this is
#'                                     typically done in the createStudyPopulation function, but can
#'                                     already be done here for efficiency reasons.
#' @param restrictToCommonPeriod       Restrict the analysis to the period when both treatments are
#'                                     observed?
#' @param washoutPeriod                The mininum required continuous observation time prior to index
#'                                     date for a person to be included in the cohort. Note that thisis
#'                                     typically done in the createStudyPopulation function, but can
#'                                     already be done here for efficiency reasons.
#' @param maxCohortSize                If either the target or the comparator cohort is larger than
#'                                     this number it will be sampled to this size. maxCohortSize = 0
#'                                     indicates no maximum size.
#'
#' @export
createGetCmDataArgs <- function(studyStartDate = "",
                                studyEndDate = "",
                                excludeDrugsFromCovariates = TRUE,
                                firstExposureOnly = FALSE,
                                removeDuplicateSubjects = FALSE,
                                restrictToCommonPeriod = FALSE,
                                washoutPeriod = 0,
                                maxCohortSize = 0) {
  # First: get default values:
  analysis <- list()
  for (name in names(formals(createGetCmDataArgs))) {
    analysis[[name]] <- get(name)
  }
  # Second: overwrite defaults with actual values:
  values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3)))
  for (name in names(values)) {
    if (name %in% names(analysis))
      analysis[[name]] <- values[[name]]
  }
  attr(analysis, "fun") <- "createGetCmDataArgs"
  class(analysis) <- "args"
  return(analysis)
}









#' Create a parameter object for the function createPs
#'
#' @details
#' Create an object defining the parameter values.
#'
#' @param excludeCovariateIds       Exclude these covariates from the propensity model.
#' @param includeCovariateIds       Include only these covariates in the propensity model.
#' @param maxCohortSizeForFitting   If the target or comparator cohort are larger than this number,
#'                                  they will be downsampled before fitting the propensity model. The
#'                                  model will be used to compute propensity scores for all subjects.
#'                                  The purpose of the sampling is to gain speed. Setting this number
#'                                  to 0 means no downsampling will be applied.
#' @param errorOnHighCorrelation    If true, the function will test each covariate for correlation with
#'                                  the treatment assignment. If any covariate has an unusually
#'                                  highcorrelation (either positive or negative), this will throw an
#'                                  error.
#' @param stopOnError               If an error occurrs, should the function stop? Else, the two
#'                                  cohorts will be assumed to be perfectly separable.
#' @param prior                     The prior used to fit the model. See createPrior for details.
#' @param control                   The control object used to control the cross-validation used to
#'                                  determine the hyperparameters of the prior (if applicable). See
#'                                  createControl for details.
#'
#' @return
#' A parameter object for creating propensity scores.
#'
#' @export

createCreatePsArgs <- function(
    excludeCovariateIds = c(),
    includeCovariateIds = c(),
    maxCohortSizeForFitting = 250000,
    errorOnHighCorrelation = TRUE,
    stopOnError = TRUE,
    prior = createPrior(
      "laplace",
      exclude = c(0),
      useCrossValidation = TRUE
    ),
    control = createControl(
      noiseLevel = "silent",
      cvType = "auto",
      tolerance = 2e-07,
      cvRepetitions = 10,
      startingVariance = 0.01
    )
) {
  # First: get default values:
  analysis <- list()
  for (name in names(formals(createCreatePsArgs))) {
    analysis[[name]] <- get(name)
  }
  # Second: overwrite defaults with actual values:
  values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3)))
  for (name in names(values)) {
    if (name %in% names(analysis))
      analysis[[name]] <- values[[name]]
  }
  attr(analysis, "fun") <- "createCreatePsArgs"
  class(analysis) <- "args"
  return(analysis)
}




#' Create a parameter object for defining the estimation analyses.
#'
#' @param label                         A single-word description of the analysis.
#' @param riskStratificationMethod      Can be "equal" for equal-sized risk strata;
#'                                      "quantile" for user-specified risk quantiles
#'                                      as thresholds; "custom" for user-specified risk
#'                                      thresholds
#' @param stratificationOutcomes        The outcome IDs on which risk stratification
#'                                      will be carried out. Should be a subset
#'                                      of the stratification outcomes defined
#'                                      in the `analysisSettings`. Default is "all".
#' @param riskStratificationThresholds  The thresholds to be used for risk stratification.
#'                                      If `riskStratificationMethod` is "equal" then
#'                                      it should be a single number denoting the number
#'                                      of equal-sized risk subgroups; if `riskStratificationMethod`
#'                                      is "quantile", then it is a vector from 0 to 1 (increasing);
#'                                      if `riskStratificationMethod` is "custom", then it is a vector
#'                                      with the predicted probabilities to be used as thresholds
#' @param psMethod                      How should the propensity scores be used? Can
#'                                      be one of "stratifyByPs" or "matchOnPs".
#' @param effectEstimationSettings      Parameter object providing further settings
#'                                      for the implementation of selected \code{psMethod}
#'                                      to the estimation process. Can be created using
#'                                      one of
#'                                      \code{\link[RiskStratifiedEstimation]{createStratifyByPsArgs}}
#'                                      when \code{stratifyByPs} is selected or
#'                                      \code{\link[RiskStratifiedEstimation]{createMatchOnPsArgs}}
#'                                      when \code{matchOnPs} is selected.
#' @param timePoint                     The time point after cohort start that absolute differences
#'                                      should be estimated.
#'
#' @return
#' A parameter object for running the the estimation step.
#'
#' @export

createRunCmAnalysesArgs <- function(
    label                        = NULL,
    riskStratificationMethod     = "equal",
    stratificationOutcomes       = "all",
    riskStratificationThresholds = 4,
    psMethod                     = "stratifyByPs",
    effectEstimationSettings     = createStratifyByPsArgs(),
    timePoint                    = 365
) {

  if (is.null(label)) {
    label <- psMethod
  }

  res <- list(
    label                        = label,
    riskStratificationMethod     = riskStratificationMethod,
    riskStratificationThresholds = riskStratificationThresholds,
    stratificationOutcomes       = stratificationOutcomes,
    psMethod                     = psMethod,
    effectEstimationSettings     = effectEstimationSettings,
    timePoint                    = timePoint
  )

  class(res) <- "args"

  return(res)

}




#' Create a parameter object for running the estimation step
#'
#' @description
#' Create a parameter object for running the estimation step. This function is
#' used to create the computational part of the input of
#' \code{\link[RiskStratifiedEstimation]{createRunSettings}}.
#'
#' @param analyses                  A list of the analyses to run. Each element of
#'                                  the list can be creaeted using
#'                                  \code{\link[RiskStratifiedEstimation]{createRunCmAnalysesArgs}}
#'                                  \code{\link[CohortMethod]{createPs}}
#' @param psSettings                The settings for estimating the propensity scores
#' @param createPsThreads           The number of parallel threads for the
#'                                  estimation of the propensity scores. Default is 1.
#' @param fitOutcomeModelsThreads   The number of parallel threads for the estimation of the
#'                                  outcome models.
#' @param balanceThreads            The number of parallel threads for the estimation
#'                                  of covariate balance
#' @param negativeControlThreads    The number of parallel threads for the negative
#'                                  control analyses
#' @param runRiskStratifiedNcs      Should risk stratified negative control analyses
#'                                  be performed? Default is FALSE as it can take
#'                                  a very long time to complete.
#'
#'
#' @return
#' A parameter object for running the the estimation step.
#' @export

createRunCmSettingsArgs <- function(
    analyses,
    psSettings                   = createCreatePsArgs(),
    createPsThreads              = 1,
    fitOutcomeModelsThreads      = 1,
    balanceThreads               = 1,
    negativeControlThreads       = 1,
    runRiskStratifiedNcs         = FALSE
) {

  res <- list(
    analyses                = analyses,
    psSettings              = psSettings,
    createPsThreads         = createPsThreads,
    fitOutcomeModelsThreads = fitOutcomeModelsThreads,
    balanceThreads          = balanceThreads,
    negativeControlThreads  = negativeControlThreads,
    runRiskStratifiedNcs    = runRiskStratifiedNcs
  )
  class(res) <- "args"

  return(res)
}




#' Create a parameter object for the function stratifyByPs
#'
#' @details
#' Create an object defining the parameter values.
#'
#' @param numberOfStrata          How many strata? The boundaries of the strata are automatically
#'                                defined to contain equal numbers of target persons.
#' @param stratificationColumns   Names of one or more columns in the data data.frame on which subjects
#'                                should also be stratified in addition to stratification on propensity
#'                                score.
#' @param baseSelection           What is the base selection of subjects where the strata bounds areto
#'                                be determined? Strata are defined as equally-sized strata inside this
#'                                selection. Possible values are "all", "target", and "comparator".
#'
#' @export

createStratifyByPsArgs <- function(
    numberOfStrata = 5,
    stratificationColumns = c(),
    baseSelection = "all"
) {
  # First: get default values:
  analysis <- list()
  for (name in names(formals(createStratifyByPsArgs))) {
    analysis[[name]] <- get(name)
  }
  # Second: overwrite defaults with actual values:
  values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3)))
  for (name in names(values)) {
    if (name %in% names(analysis))
      analysis[[name]] <- values[[name]]
  }
  class(analysis) <- "args"
  return(analysis)
}




#' Create a parameter object for the function matchOnPs
#'
#' @details
#' Create an object defining the parameter values.
#'
#' @param caliper                 The caliper for matching. A caliper is the distance which is
#'                                acceptable for any match. Observations which are outside of the
#'                                caliper are dropped. A caliper of 0 means no caliper is used.
#' @param caliperScale            The scale on which the caliper is defined. Three scales are
#'                                supported: caliperScale = 'propensity score', caliperScale
#'                                ='standardized', or caliperScale = 'standardized logit'. On the
#'                                standardized scale, the caliper is interpreted in standard deviations
#'                                of the propensity score distribution. 'standardized logit'is similar,
#'                                except that the propensity score is transformed to the logitscale
#'                                because the PS is more likely to be normally distributed on that
#'                                scale(Austin, 2011).
#' @param maxRatio                The maximum number of persons int the comparator arm to be matched to
#'                                each person in the treatment arm. A maxRatio of 0 means no
#'                                maximum:all comparators will be assigned to a target person.
#' @param stratificationColumns   Names or numbers of one or more columns in the data data.frameon
#'                                which subjects should be stratified prior to matching. No persons
#'                                will be matched with persons outside of the strata identified by the
#'                                values in these columns.
#'
#' @export

createMatchOnPsArgs <- function(
    caliper = 0.2,
    caliperScale = "standardized logit",
    maxRatio = 1,
    stratificationColumns = c()
) {
  # First: get default values:
  analysis <- list()
  for (name in names(formals(createMatchOnPsArgs))) {
    analysis[[name]] <- get(name)
  }
  # Second: overwrite defaults with actual values:
  values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3)))
  for (name in names(values)) {
    if (name %in% names(analysis))
      analysis[[name]] <- values[[name]]
  }
  class(analysis) <- "args"
  return(analysis)
}




#' Create a parameter object for the function createIPW Create an object defining the parameter
#' values.
#'
#' @param weightsType            The type of the weights to be used. Allowed options are 'ATE' for
#'                               average treatment effect and 'ATT' for average treatment effect on the
#'                               treated weights
#' @param useStabilizedWeights   Should stabilized weights be used?
#' @param truncationLevels       The level of truncation expressed in percentiles of the propensity
#'                               score.
#'
#' @export

createCreateIPWArgs <- function(weightsType = "ATE",
                                useStabilizedWeights = TRUE,
                                truncationLevels = c(0.01, 0.99)) {
  # First: get default values:
  analysis <- list()
  for (name in names(formals(createCreateIPWArgs))) {
    analysis[[name]] <- get(name)
  }
  # Second: overwrite defaults with actual values:
  values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3)))
  for (name in names(values)) {
    if (name %in% names(analysis))
      analysis[[name]] <- values[[name]]
  }
  class(analysis) <- "args"
  return(analysis)
}




#' Create a parameter defining the performed risk stratified analysis
#'
#'
#' @param analysisId                The analysis ID.
#' @param description               Text describing the analysis.
#' @param databaseName              The name of the database.
#' @param treatmentCohortId         The cohort definition id of the treatment cohort
#'                                  in the cohortTable.
#' @param comparatorCohortId        The cohort definition id of the comparator cohort
#'                                  in the cohortTable.
#' @param outcomeIds                The cohort definition ids of the outcome cohorts
#'                                  in the outcomeTable.
#' @param analysisMatrix            Boolean matrix defining the outcomes to be assessed
#'                                  (rows) within risk strata (columns). The order
#'                                  in columns should match the the order of
#'                                  \code{outcomeIds}. Default is the diagonal matrix,
#'                                  which leads to the risk stratified assessment
#'                                  of only the outcome for which the risk strata
#'                                  were defined.
#' @param mapTreatments             Dataframe containing 2 columns: "exposure_id"
#'                                  with the id numbers of the
#'                                  treatment and comparator cohorts and "exposure_name"
#'                                  the cohort names.
#' @param mapOutcomes               Dataframe containing 2 columns: "outcome_id" with
#'                                  the cohort names of the outcomes of interest and
#'                                  "outcome_name" with their names.
#' @param negativeControlOutcomes   The outcome Ids to be used as negative controls
#' @param balanceThreads            The number of threads to be used for the estimation
#'                                  of covariate balance
#' @param negativeControlThreads    The number of threads to be used for running
#'                                  the negative control analyses
#' @param verbosity                 Sets the level of the verbosity. If the log level
#'                                  is at or higher in priority than the logger threshold,
#'                                  a message will print. The levels are:
#'                                  \itemize{
#'                                    \item {DEBUG}{Highest verbosity showing all debug statements}
#'                                    \item {TRACE}{Showing information about start and end of steps}
#'                                    \item {INFO}{Show informative information (Default)}
#'                                    \item {WARN}{Show warning messages}
#'                                    \item {ERROR}{Show error messages}
#'                                    \item {FATAL}{Be silent except for fatal errors}
#'                                  }
#' @param saveDirectory             The directory name where the results of the analyses will
#'                                  be stored.
#' @return
#' An analysisSettings object providing the identification information of the analysis.
#'
#' @export

createAnalysisSettings <- function(
    analysisId = NULL,
    description = "",
    databaseName,
    treatmentCohortId,
    comparatorCohortId,
    outcomeIds,
    analysisMatrix = diag(length(outcomeIds)),
    mapTreatments,
    mapOutcomes,
    negativeControlOutcomes = c(),
    balanceThreads = 1,
    negativeControlThreads = 1,
    verbosity = NULL,
    saveDirectory = NULL
) {
  res <- list(
    analysisId              = analysisId,
    description             = description,
    databaseName            = databaseName,
    treatmentCohortId       = treatmentCohortId,
    comparatorCohortId      = comparatorCohortId,
    outcomeIds              = outcomeIds,
    analysisMatrix          = analysisMatrix,
    mapTreatments           = mapTreatments,
    mapOutcomes             = mapOutcomes,
    negativeControlOutcomes = negativeControlOutcomes,
    balanceThreads          = balanceThreads,
    negativeControlThreads  = negativeControlThreads,
    verbosity               = verbosity,
    saveDirectory           = saveDirectory
  )

  attr(res, "fun") <- "createAnalysisSettings"
  class(res) <- "analysisSettings"

  return(res)

}




#' Create parameter object for extracting the data
#'
#'
#'
#' @param getPlpDataSettings       Parameter object for the extraction of the \code{plpData} object
#'                                 created from
#'                                 \code{\link[RiskStratifiedEstimation]{createGetPlpDataArgs}}.
#' @param getCmDataSettings        Parameter object for the extraction of the \code{cohortMethodData}
#'                                 object created from
#'                                 \code{\link[RiskStratifiedEstimation]{createGetPlpDataArgs}}.
#' @param plpDataFolder            The directory path where the \code{plpData} object has already been
#'                                 saved locally.
#' @param cohortMethodDataFolder   The directory path where the \code{cohortMethodData} object has
#'                                 already been saved locally.
#'
#' @export

createGetDataSettings <- function(getPlpDataSettings = createGetPlpDataArgs(),
                                  getCmDataSettings = createGetCmDataArgs(),
                                  plpDataFolder = NULL,
                                  cohortMethodDataFolder = NULL) {

  res <- list(getPlpDataSettings = getPlpDataSettings,
              getCmDataSettings = getCmDataSettings,
              plpDataFolder = plpDataFolder,
              cohortMethodDataFolder = cohortMethodDataFolder)

  attr(res, "fun") <- "createGetDataSettings"
  class(res) <- "getDataSettings"

  return(res)
}




#' Create parameter object for defining the analysis populations Contains the settings for defining
#' both the \code{populationPlp} and the \code{populationCm} objects.
#
#' @param populationPlpSettings   Parameter object for the definition of the \code{populationPlp}
#'                                object created from
#'                                \code{\link[PatientLevelPrediction]{createStudyPopulationSettings}}.
#' @param populationCmSettings    Parameter object for the definition of the \code{populationCm} object
#'                                created from
#'                                \code{\link[CohortMethod]{createCreateStudyPopulationArgs}}.
#' @param postProcessing          A function to run on the initial population after
#'                                creating the prediction and estimation populations
#'
#' @export

createPopulationSettings <- function(
    populationPlpSettings = PatientLevelPrediction::createStudyPopulationSettings(),
    populationCmSettings  = CohortMethod::createCreateStudyPopulationArgs(),
    postProcessing        = "none"
) {

  res <- list(
    populationPlpSettings = populationPlpSettings,
    populationCmSettings  = populationCmSettings,
    postProcessing        = postProcessing
  )


  attr(res, "fun") <- "createPopulationSettings"
  class(res)       <- "populationSettings"

  return(res)

}




#' Create parameter object for database to be reached
#'
#' @param cdmVersion               Define the OMOP CDM version
#' @param cdmDatabaseSchema        The name of the database schema that contains the OMOP CDM instance.
#'                                 Requires read permissions to this database. On SQL Server, this
#'                                 should specify both the database and the schema, so for example
#'                                 "cdm_instance.dbo"
#' @param databaseName             The name of the database.
#' @param cohortDatabaseSchema     The name of the database schema that is the location where the
#'                                 cohort data used to define the at risk cohort is available. If
#'                                 cohortTable = DRUG_ERA, \code{cohortDatabaseSchema} is not used by
#'                                 assumed to be \code{cdmDatabaseSchema}.
#' @param outcomeDatabaseSchema    The name of the database schema that is the location where the data
#'                                 used to define the outcome cohorts is available. If cohortTable =
#'                                 CONDITION_ERA, exposureDatabaseSchema is not used by assumed to be
#'                                 cdmSchema. Requires read permissions to this database.
#' @param resultsDatabaseSchema    The name of the database schema with write permissions.
#' @param exposureDatabaseSchema   Input of function \code{\link[CohortMethod]{getDbCohortMethodData}}:
#'                                 The name of the database schema that is the location where the
#'                                 exposure data used to define the exposure cohorts is available.
#' @param tempEmulationSchema      Some database platforms like Oracle and Impala do not truly support
#'                                 temp tables. To emulate temp tables, provide a schema with write
#'                                 privileges where temp tables can be created.
#' @param cohortTable              The tablename that contains the at risk cohort. If cohortTable <>
#'                                 DRUG_ERA, then expectation is cohortTable has format of COHORT
#'                                 table: cohort_concept_id, SUBJECT_ID, COHORT_START_DATE,
#'                                 COHORT_END_DATE.
#' @param outcomeTable             The tablename that contains the outcome cohorts. If outcomeTable <>
#'                                 CONDITION_OCCURRENCE, then expectation is outcomeTable has format of
#'                                 COHORT table: COHORT_DEFINITION_ID, SUBJECT_ID, COHORT_START_DATE,
#'                                 COHORT_END_DATE.
#' @param exposureTable            The tablename that contains the exposure cohorts. If exposureTable
#'                                 <> DRUG_ERA, then expectation is exposureTable has format of COHORT
#'                                 table: cohort_concept_id, SUBJECT_ID, COHORT_START_DATE,
#'                                 COHORT_END_DATE.
#' @param mergedCohortTable        The name of the table where the merged treatment and comparator
#'                                 cohorts will be stored.
#' @param targetCohortId           The cohort definition id of of the merged cohort in the
#'                                 \code{mergedCohortTable}.
#'
#' @export

createDatabaseSettings <- function(cdmVersion = "5",
                                   cdmDatabaseSchema,
                                   databaseName,
                                   cohortDatabaseSchema = cdmDatabaseSchema,
                                   outcomeDatabaseSchema = cdmDatabaseSchema,
                                   resultsDatabaseSchema = cdmDatabaseSchema,
                                   exposureDatabaseSchema = cdmDatabaseSchema,
                                   tempEmulationSchema = cdmDatabaseSchema,
                                   cohortTable,
                                   outcomeTable,
                                   exposureTable = "drug_era",
                                   mergedCohortTable,
                                   targetCohortId = 1) {

  res <- list(cdmVersion = cdmVersion,
              cdmDatabaseSchema = cdmDatabaseSchema,
              databaseName = databaseName,
              cohortDatabaseSchema = cohortDatabaseSchema,
              outcomeDatabaseSchema = outcomeDatabaseSchema,
              resultsDatabaseSchema = resultsDatabaseSchema,
              exposureDatabaseSchema = exposureDatabaseSchema,
              cohortTable = cohortTable,
              outcomeTable = outcomeTable,
              exposureTable = exposureTable,
              tempEmulationSchema = tempEmulationSchema,
              mergedCohortTable = mergedCohortTable,
              targetCohortId = targetCohortId)


  attr(res, "fun") <- "createDatabaseSettings"
  class(res) <- "databaseSettings"

  return(res)

}




#' Create the parameter object for extracting the covariates Contains the arguments for the extraction
#' of both the covariates related to the prediction step and the estimation step.
#'
#' @param covariateSettingsCm    The covariate settings object related to the estimation step created
#'                               from \code{\link[FeatureExtraction]{createCovariateSettings}}.
#' @param covariateSettingsPlp   The covariate settings objec related to the prediction step created
#'                               from \code{\link[FeatureExtraction]{createCovariateSettings}}.
#'
#' @export

createGetCovariateSettings <- function(covariateSettingsCm = FeatureExtraction::createCovariateSettings(),
                                       covariateSettingsPlp = FeatureExtraction::createCovariateSettings()) {

  res <- list(covariateSettingsCm = covariateSettingsCm,
              covariateSettingsPlp = covariateSettingsPlp)

  return(res)

}
OHDSI/RiskStratifiedEstimation documentation built on March 13, 2023, 8:16 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
OHDSI/RiskStratifiedEstimation
Risk Stratified Effect Estimation

R/CreateArgFuns.R
In OHDSI/RiskStratifiedEstimation: Risk Stratified Effect Estimation

R Package Documentation

Browse R Packages

We want your feedback!

OHDSI/RiskStratifiedEstimation Risk Stratified Effect Estimation

R/CreateArgFuns.R In OHDSI/RiskStratifiedEstimation: Risk Stratified Effect Estimation

R Package Documentation

Browse R Packages

We want your feedback!

OHDSI/RiskStratifiedEstimation
Risk Stratified Effect Estimation

R/CreateArgFuns.R
In OHDSI/RiskStratifiedEstimation: Risk Stratified Effect Estimation